From a175314c3e5827eb193872241446f2f8f5c9d33c Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 20:07:14 +0200 Subject: Adding upstream version 1:10.5.12. Signed-off-by: Daniel Baumann --- storage/rocksdb/.clang-format | 137 + storage/rocksdb/.gitignore | 2 + storage/rocksdb/CMakeLists.txt | 276 + storage/rocksdb/README | 50 + storage/rocksdb/atomic_stat.h | 94 + storage/rocksdb/build_rocksdb.cmake | 506 + storage/rocksdb/event_listener.cc | 96 + storage/rocksdb/event_listener.h | 49 + storage/rocksdb/get_rocksdb_files.sh | 27 + storage/rocksdb/ha_rocksdb.cc | 14638 ++++++++++++ storage/rocksdb/ha_rocksdb.h | 1069 + storage/rocksdb/ha_rocksdb_proto.h | 103 + storage/rocksdb/logger.h | 85 + storage/rocksdb/myrocks_hotbackup.py | 697 + storage/rocksdb/mysql-test/rocksdb/combinations | 5 + .../rocksdb/include/autoinc_crash_safe.inc | 150 + .../mysql-test/rocksdb/include/bulk_load.inc | 165 + .../rocksdb/include/bulk_load_unsorted.inc | 143 + .../rocksdb/include/bypass_create_table.inc | 298 + .../rocksdb/include/ddl_high_priority.inc | 174 + .../rocksdb/include/ddl_high_priority_module.inc | 141 + .../mysql-test/rocksdb/include/deadlock_stats.inc | 52 + .../mysql-test/rocksdb/include/dup_key_update.inc | 69 + .../mysql-test/rocksdb/include/group_min_max.inc | 1438 ++ .../mysql-test/rocksdb/include/have_direct_io.inc | 23 + .../mysql-test/rocksdb/include/have_rocksdb.inc | 10 + .../mysql-test/rocksdb/include/have_rocksdb.opt | 12 + .../rocksdb/include/have_rocksdb_default.inc | 10 + .../rocksdb/include/have_rocksdb_replication.inc | 11 + .../rocksdb/include/have_write_committed.inc | 3 + .../rocksdb/include/have_write_prepared.inc | 3 + .../mysql-test/rocksdb/include/index_merge1.inc | 910 + .../mysql-test/rocksdb/include/index_merge2.inc | 520 + .../rocksdb/include/index_merge_2sweeps.inc | 80 + .../mysql-test/rocksdb/include/index_merge_ror.inc | 463 + .../rocksdb/include/index_merge_ror_cpk.inc | 178 + .../rocksdb/include/locking_issues_case1_1.inc | 51 + .../rocksdb/include/locking_issues_case1_2.inc | 48 + .../rocksdb/include/locking_issues_case2.inc | 97 + .../rocksdb/include/locking_issues_case3.inc | 71 + .../rocksdb/include/locking_issues_case4.inc | 69 + .../rocksdb/include/locking_issues_case5.inc | 77 + .../rocksdb/include/locking_issues_case6.inc | 77 + .../rocksdb/include/locking_issues_case7.inc | 89 + .../include/prefix_index_only_query_check.inc | 21 + .../include/restart_mysqld_with_invalid_option.inc | 8 + .../rocksdb/include/restart_mysqld_with_option.inc | 31 + .../mysql-test/rocksdb/include/rocksdb_icp.inc | 199 + .../mysql-test/rocksdb/include/simple_deadlock.inc | 29 + .../rocksdb/include/start_mysqld_with_option.inc | 14 + .../rocksdb/include/use_direct_io_option.inc | 24 + storage/rocksdb/mysql-test/rocksdb/my.cnf | 19 + storage/rocksdb/mysql-test/rocksdb/r/1st.result | 22 + .../mysql-test/rocksdb/r/2pc_group_commit.result | 101 + .../mysql-test/rocksdb/r/add_index_inplace.result | 505 + .../rocksdb/r/add_index_inplace_cardinality.result | 24 + .../rocksdb/r/add_index_inplace_crash.result | 93 + .../r/add_index_inplace_sstfilewriter.result | 79 + .../rocksdb/r/add_unique_index_inplace.result | 111 + .../rocksdb/r/allow_no_pk_concurrent_insert.result | 7 + .../rocksdb/r/allow_no_primary_key.result | 298 + .../rocksdb/r/allow_no_primary_key_with_sk.result | 806 + .../r/allow_to_start_after_corruption.result | 38 + .../mysql-test/rocksdb/r/alter_table.result | 183 + .../mysql-test/rocksdb/r/analyze_table.result | 61 + .../mysql-test/rocksdb/r/apply_changes_iter.result | 64 + .../mysql-test/rocksdb/r/autoinc_crash_safe.result | 132 + .../rocksdb/r/autoinc_crash_safe_partition.result | 132 + .../mysql-test/rocksdb/r/autoinc_debug.result | 99 + .../mysql-test/rocksdb/r/autoinc_secondary.result | 17 + .../mysql-test/rocksdb/r/autoinc_vars.result | 199 + .../rocksdb/r/autoinc_vars_thread.result | 39 + .../rocksdb/r/autoinc_vars_thread_2.result | 98 + .../rocksdb/r/binlog_rotate_crash.result | 19 + .../mysql-test/rocksdb/r/blind_delete_rc.result | 87 + .../mysql-test/rocksdb/r/blind_delete_rr.result | 87 + .../mysql-test/rocksdb/r/bloomfilter.result | 2042 ++ .../mysql-test/rocksdb/r/bloomfilter2.result | 71 + .../mysql-test/rocksdb/r/bloomfilter3.result | 143 + .../mysql-test/rocksdb/r/bloomfilter4.result | 32 + .../mysql-test/rocksdb/r/bloomfilter5.result | 85 + .../rocksdb/r/bloomfilter_bulk_load.result | 15 + .../mysql-test/rocksdb/r/bloomfilter_skip.result | 2042 ++ .../rocksdb/mysql-test/rocksdb/r/bulk_load.result | 81 + .../rocksdb/r/bulk_load_drop_table.result | 11 + .../mysql-test/rocksdb/r/bulk_load_errors.result | 100 + .../mysql-test/rocksdb/r/bulk_load_rev_cf.result | 81 + .../rocksdb/r/bulk_load_rev_cf_and_data.result | 81 + .../mysql-test/rocksdb/r/bulk_load_rev_data.result | 81 + .../mysql-test/rocksdb/r/bulk_load_sk.result | 229 + .../mysql-test/rocksdb/r/bulk_load_unsorted.result | 111 + .../rocksdb/r/bulk_load_unsorted_errors.result | 4 + .../rocksdb/r/bulk_load_unsorted_rev.result | 111 + .../rocksdb/r/bypass_select_basic.result | 693 + .../rocksdb/r/bypass_select_basic_bloom.result | 693 + .../mysql-test/rocksdb/r/bytes_written.result | 10 + .../mysql-test/rocksdb/r/cardinality.result | 104 + .../mysql-test/rocksdb/r/check_flags.result | 66 + .../rocksdb/r/check_ignore_unknown_options.result | 7 + .../mysql-test/rocksdb/r/check_table.result | 68 + .../rocksdb/mysql-test/rocksdb/r/checkpoint.result | 59 + .../mysql-test/rocksdb/r/checksum_table.result | 92 + .../rocksdb/r/checksum_table_live.result | 20 + .../mysql-test/rocksdb/r/col_opt_default.result | 20 + .../mysql-test/rocksdb/r/col_opt_not_null.result | 2613 +++ .../mysql-test/rocksdb/r/col_opt_null.result | 2268 ++ .../mysql-test/rocksdb/r/col_opt_unsigned.result | 749 + .../mysql-test/rocksdb/r/col_opt_zerofill.result | 731 + .../rocksdb/mysql-test/rocksdb/r/collation.result | 144 + .../rocksdb/r/collation_exception.result | 25 + .../rocksdb/mysql-test/rocksdb/r/com_rpc_tx.result | 21 + .../rocksdb/r/commit_in_the_middle_ddl.result | 14 + .../mysql-test/rocksdb/r/compact_deletes.result | 78 + .../mysql-test/rocksdb/r/compression_zstd.result | 3 + .../mysql-test/rocksdb/r/concurrent_alter.result | 12 + .../rocksdb/r/cons_snapshot_read_committed.result | 151 + .../rocksdb/r/cons_snapshot_repeatable_read.result | 144 + .../rocksdb/r/cons_snapshot_serializable.result | 24 + .../rocksdb/r/corrupted_data_reads_debug.result | 74 + .../rocksdb/r/covered_unpack_info_format.result | 73 + .../rocksdb/r/create_no_primary_key_table.result | 52 + .../mysql-test/rocksdb/r/create_table.result | 165 + .../mysql-test/rocksdb/r/ddl_high_priority.result | 1058 + .../rocksdb/mysql-test/rocksdb/r/deadlock.result | 37 + .../mysql-test/rocksdb/r/deadlock_stats.result | 14 + .../mysql-test/rocksdb/r/deadlock_tracking.result | 488 + storage/rocksdb/mysql-test/rocksdb/r/delete.result | 166 + .../mysql-test/rocksdb/r/delete_ignore.result | 59 + .../mysql-test/rocksdb/r/delete_quick.result | 24 + .../mysql-test/rocksdb/r/delete_with_keys.result | 38 + .../rocksdb/mysql-test/rocksdb/r/describe.result | 19 + .../mysql-test/rocksdb/r/drop_database.result | 6 + .../mysql-test/rocksdb/r/drop_index_inplace.result | 158 + .../rocksdb/mysql-test/rocksdb/r/drop_table.result | 79 + .../mysql-test/rocksdb/r/drop_table2.result | 66 + .../mysql-test/rocksdb/r/drop_table3.result | 25 + .../mysql-test/rocksdb/r/dup_key_update.result | 366 + .../mysql-test/rocksdb/r/duplicate_table.result | 15 + .../mysql-test/rocksdb/r/explicit_snapshot.result | 265 + .../mysql-test/rocksdb/r/fail_system_cf.result | 4 + .../rocksdb/r/fast_prefix_index_fetch.result | 80 + .../mysql-test/rocksdb/r/force_shutdown.result | 38 + .../mysql-test/rocksdb/r/foreign_key.result | 25 + .../mysql-test/rocksdb/r/gap_lock_issue254.result | 9 + .../rocksdb/r/gap_lock_raise_error.result | 504 + .../mysql-test/rocksdb/r/get_error_message.result | 8 + .../mysql-test/rocksdb/r/group_min_max.result | 3526 +++ .../mysql-test/rocksdb/r/ha_extra_keyread.result | 10 + .../mysql-test/rocksdb/r/handler_basic.result | 128 + .../rocksdb/mysql-test/rocksdb/r/hermitage.result | 648 + storage/rocksdb/mysql-test/rocksdb/r/i_s.result | 159 + .../rocksdb/mysql-test/rocksdb/r/i_s_ddl.result | 22 + .../mysql-test/rocksdb/r/i_s_deadlock.result | 216 + storage/rocksdb/mysql-test/rocksdb/r/index.result | 89 + .../mysql-test/rocksdb/r/index_file_map.result | 31 + .../rocksdb/r/index_key_block_size.result | 53 + .../rocksdb/r/index_merge_rocksdb.result | 49 + .../rocksdb/r/index_merge_rocksdb2.result | 1418 ++ .../mysql-test/rocksdb/r/index_primary.result | 71 + .../mysql-test/rocksdb/r/index_type_btree.result | 69 + .../mysql-test/rocksdb/r/index_type_hash.result | 69 + .../mysql-test/rocksdb/r/information_schema.result | 83 + .../rocksdb/r/innodb_i_s_tables_disabled.result | 326 + storage/rocksdb/mysql-test/rocksdb/r/insert.result | 202 + .../rocksdb/r/insert_optimized_config.result | 16 + .../mysql-test/rocksdb/r/insert_with_keys.result | 262 + .../rocksdb/mysql-test/rocksdb/r/issue100.result | 23 + .../mysql-test/rocksdb/r/issue100_delete.result | 17 + .../rocksdb/mysql-test/rocksdb/r/issue111.result | 37 + .../rocksdb/r/issue243_transactionStatus.result | 161 + .../rocksdb/mysql-test/rocksdb/r/issue255.result | 68 + .../rocksdb/mysql-test/rocksdb/r/issue290.result | 28 + .../rocksdb/mysql-test/rocksdb/r/issue314.result | 12 + .../rocksdb/mysql-test/rocksdb/r/issue495.result | 30 + .../rocksdb/mysql-test/rocksdb/r/issue884.result | 80 + .../rocksdb/mysql-test/rocksdb/r/issue896.result | 17 + .../rocksdb/mysql-test/rocksdb/r/issue900.result | 11 + .../mysql-test/rocksdb/r/iterator_bounds.result | 15 + storage/rocksdb/mysql-test/rocksdb/r/kill.result | 6 + .../rocksdb/r/level_read_committed.result | 111 + .../rocksdb/r/level_read_uncommitted.result | 116 + .../rocksdb/r/level_repeatable_read.result | 100 + .../mysql-test/rocksdb/r/level_serializable.result | 56 + .../rocksdb/mysql-test/rocksdb/r/loaddata.result | 239 + storage/rocksdb/mysql-test/rocksdb/r/lock.result | 121 + .../rocksdb/mysql-test/rocksdb/r/lock_info.result | 31 + .../rocksdb/r/lock_rows_not_exist.result | 40 + .../rocksdb/r/lock_wait_timeout_stats.result | 35 + .../mysql-test/rocksdb/r/locking_issues.result | 1 + .../rocksdb/r/locking_issues_case1_1_rc.result | 30 + .../rocksdb/r/locking_issues_case1_1_rr.result | 30 + .../rocksdb/r/locking_issues_case1_2_rc.result | 30 + .../rocksdb/r/locking_issues_case1_2_rr.result | 30 + .../rocksdb/r/locking_issues_case2_rc.result | 50 + .../rocksdb/r/locking_issues_case2_rc_lsr.result | 37 + .../rocksdb/r/locking_issues_case2_rr.result | 50 + .../rocksdb/r/locking_issues_case2_rr_lsr.result | 37 + .../rocksdb/r/locking_issues_case3_rc.result | 25 + .../rocksdb/r/locking_issues_case3_rr.result | 23 + .../rocksdb/r/locking_issues_case4_rc.result | 23 + .../rocksdb/r/locking_issues_case4_rr.result | 23 + .../rocksdb/r/locking_issues_case5_rc.result | 29 + .../rocksdb/r/locking_issues_case5_rr.result | 28 + .../rocksdb/r/locking_issues_case6_rc.result | 29 + .../rocksdb/r/locking_issues_case6_rr.result | 28 + .../rocksdb/r/locking_issues_case7_rc.result | 41 + .../rocksdb/r/locking_issues_case7_rc_lsr.result | 45 + .../rocksdb/r/locking_issues_case7_rr.result | 41 + .../rocksdb/r/locking_issues_case7_rr_lsr.result | 45 + .../rocksdb/r/mariadb_ignore_dirs.result | 9 + .../rocksdb/r/mariadb_misc_binlog.result | 33 + .../mysql-test/rocksdb/r/mariadb_plugin.result | 35 + .../mysql-test/rocksdb/r/mariadb_port_fixes.result | 119 + .../mysql-test/rocksdb/r/mariadb_port_rpl.result | 14 + .../mysql-test/rocksdb/r/max_open_files.result | 22 + storage/rocksdb/mysql-test/rocksdb/r/misc.result | 94 + .../rocksdb/r/multi_varchar_sk_lookup.result | 37 + .../rocksdb/r/mysqlbinlog_blind_replace.result | 128 + ...ysqlbinlog_gtid_skip_empty_trans_rocksdb.result | 143 + .../rocksdb/mysql-test/rocksdb/r/mysqldump.result | 246 + .../rocksdb/mysql-test/rocksdb/r/mysqldump2.result | 16 + .../mysql-test/rocksdb/r/native_procedure.result | 397 + .../mysql-test/rocksdb/r/negative_stats.result | 9 + .../mysql-test/rocksdb/r/no_merge_sort.result | 123 + .../r/optimize_myrocks_replace_into_base.result | 98 + .../r/optimize_myrocks_replace_into_lock.result | 46 + .../mysql-test/rocksdb/r/optimize_table.result | 77 + .../rocksdb/r/optimizer_loose_index_scans.result | 281 + .../rocksdb/mysql-test/rocksdb/r/partition.result | 689 + .../r/percona_nonflushing_analyze_debug.result | 19 + .../mysql-test/rocksdb/r/perf_context.result | 191 + .../mysql-test/rocksdb/r/persistent_cache.result | 11 + .../rocksdb/r/prefix_extractor_override.result | 82 + .../mysql-test/rocksdb/r/read_only_tx.result | 46 + .../mysql-test/rocksdb/r/records_in_range.result | 211 + .../mysql-test/rocksdb/r/repair_table.result | 37 + .../rocksdb/mysql-test/rocksdb/r/replace.result | 32 + .../rocksdb/mysql-test/rocksdb/r/rocksdb.result | 2643 +++ .../mysql-test/rocksdb/r/rocksdb_cf_options.result | 64 + .../rocksdb/r/rocksdb_cf_per_partition.result | 425 + .../mysql-test/rocksdb/r/rocksdb_cf_reverse.result | 120 + .../mysql-test/rocksdb/r/rocksdb_checksums.result | 129 + .../rocksdb/r/rocksdb_concurrent_delete.result | 671 + .../mysql-test/rocksdb/r/rocksdb_datadir.result | 2 + .../rocksdb/r/rocksdb_deadlock_detect_rc.result | 89 + .../rocksdb/r/rocksdb_deadlock_detect_rr.result | 89 + .../rocksdb/r/rocksdb_deadlock_stress_rc.result | 8 + .../rocksdb/r/rocksdb_deadlock_stress_rr.result | 8 + .../mysql-test/rocksdb/r/rocksdb_debug.result | 11 + .../mysql-test/rocksdb/r/rocksdb_icp.result | 257 + .../mysql-test/rocksdb/r/rocksdb_icp_rev.result | 223 + .../mysql-test/rocksdb/r/rocksdb_locks.result | 64 + .../mysql-test/rocksdb/r/rocksdb_parts.result | 152 + .../mysql-test/rocksdb/r/rocksdb_qcache.result | 45 + .../mysql-test/rocksdb/r/rocksdb_range.result | 293 + .../mysql-test/rocksdb/r/rocksdb_range2.result | 29 + .../rocksdb/r/rocksdb_read_free_rpl.result | 335 + .../rocksdb/r/rocksdb_read_free_rpl_stress.result | 35 + .../mysql-test/rocksdb/r/rocksdb_row_stats.result | 66 + .../rocksdb_table_stats_sampling_pct_change.result | 23 + .../rocksdb/r/rocksdb_timeout_rollback.result | 84 + .../mysql-test/rocksdb/r/rollback_savepoint.result | 29 + .../mysql-test/rocksdb/r/rpl_row_not_found.result | 56 + .../rocksdb/r/rpl_row_not_found_rc.result | 56 + .../mysql-test/rocksdb/r/rpl_row_rocksdb.result | 45 + .../mysql-test/rocksdb/r/rpl_row_stats.result | 98 + .../mysql-test/rocksdb/r/rpl_row_triggers.result | 286 + .../mysql-test/rocksdb/r/rpl_savepoint.result | 110 + .../mysql-test/rocksdb/r/rpl_statement.result | 57 + .../rocksdb/r/rpl_statement_not_found.result | 70 + .../mysql-test/rocksdb/r/rqg_examples.result | 3 + .../mysql-test/rocksdb/r/rqg_runtime.result | 30 + .../mysql-test/rocksdb/r/rqg_transactions.result | 11 + .../rocksdb/r/secondary_key_update_lock.result | 18 + storage/rocksdb/mysql-test/rocksdb/r/select.result | 380 + .../mysql-test/rocksdb/r/select_for_update.result | 35 + .../r/select_for_update_skip_locked_nowait.result | 28 + .../rocksdb/r/select_lock_in_share_mode.result | 37 + .../mysql-test/rocksdb/r/show_engine.result | 491 + .../mysql-test/rocksdb/r/show_table_status.result | 136 + .../rocksdb/mysql-test/rocksdb/r/shutdown.result | 9 + .../mysql-test/rocksdb/r/singledelete.result | 86 + .../rocksdb/r/skip_core_dump_on_error.result | 31 + .../rocksdb/r/skip_validate_tmp_table.result | 22 + .../mysql-test/rocksdb/r/slow_query_log.result | 10 + .../rocksdb/mysql-test/rocksdb/r/statistics.result | 73 + .../mysql-test/rocksdb/r/table_stats.result | 12 + .../rocksdb/mysql-test/rocksdb/r/tbl_opt_ai.result | 38 + .../rocksdb/r/tbl_opt_avg_row_length.result | 18 + .../mysql-test/rocksdb/r/tbl_opt_checksum.result | 18 + .../mysql-test/rocksdb/r/tbl_opt_connection.result | 26 + .../rocksdb/r/tbl_opt_data_index_dir.result | 41 + .../rocksdb/r/tbl_opt_delay_key_write.result | 18 + .../rocksdb/r/tbl_opt_insert_method.result | 18 + .../rocksdb/r/tbl_opt_key_block_size.result | 18 + .../mysql-test/rocksdb/r/tbl_opt_max_rows.result | 18 + .../mysql-test/rocksdb/r/tbl_opt_min_rows.result | 18 + .../mysql-test/rocksdb/r/tbl_opt_pack_keys.result | 18 + .../mysql-test/rocksdb/r/tbl_opt_password.result | 18 + .../mysql-test/rocksdb/r/tbl_opt_row_format.result | 18 + .../mysql-test/rocksdb/r/tbl_opt_union.result | 16 + .../mysql-test/rocksdb/r/tbl_standard_opts.result | 46 + storage/rocksdb/mysql-test/rocksdb/r/tmpdir.result | 32 + .../mysql-test/rocksdb/r/transaction.result | 977 + .../mysql-test/rocksdb/r/truncate_partition.result | 620 + .../mysql-test/rocksdb/r/truncate_table.result | 33 + .../mysql-test/rocksdb/r/truncate_table3.result | 23 + .../rocksdb/mysql-test/rocksdb/r/trx_info.result | 13 + .../mysql-test/rocksdb/r/trx_info_rpl.result | 16 + .../mysql-test/rocksdb/r/ttl_primary.result | 489 + .../rocksdb/r/ttl_primary_read_filtering.result | 283 + .../rocksdb/r/ttl_primary_with_partitions.result | 256 + .../mysql-test/rocksdb/r/ttl_rows_examined.result | 45 + .../mysql-test/rocksdb/r/ttl_secondary.result | 709 + .../rocksdb/r/ttl_secondary_read_filtering.result | 511 + ..._secondary_read_filtering_multiple_index.result | 82 + .../rocksdb/r/ttl_secondary_with_partitions.result | 389 + .../mysql-test/rocksdb/r/type_binary.result | 48 + .../rocksdb/r/type_binary_indexes.result | 80 + .../rocksdb/mysql-test/rocksdb/r/type_bit.result | 53 + .../mysql-test/rocksdb/r/type_bit_indexes.result | 58 + .../rocksdb/mysql-test/rocksdb/r/type_blob.result | 57 + .../mysql-test/rocksdb/r/type_blob_indexes.result | 198 + .../rocksdb/mysql-test/rocksdb/r/type_bool.result | 73 + .../rocksdb/mysql-test/rocksdb/r/type_char.result | 76 + .../mysql-test/rocksdb/r/type_char_indexes.result | 73 + .../rocksdb/r/type_char_indexes_collation.result | 91 + .../mysql-test/rocksdb/r/type_date_time.result | 56 + .../rocksdb/r/type_date_time_indexes.result | 119 + .../mysql-test/rocksdb/r/type_decimal.result | 105 + .../rocksdb/mysql-test/rocksdb/r/type_enum.result | 47 + .../mysql-test/rocksdb/r/type_enum_indexes.result | 69 + .../rocksdb/mysql-test/rocksdb/r/type_fixed.result | 131 + .../mysql-test/rocksdb/r/type_fixed_indexes.result | 129 + .../rocksdb/mysql-test/rocksdb/r/type_float.result | 314 + .../mysql-test/rocksdb/r/type_float_indexes.result | 189 + .../rocksdb/mysql-test/rocksdb/r/type_int.result | 212 + .../mysql-test/rocksdb/r/type_int_indexes.result | 99 + .../rocksdb/mysql-test/rocksdb/r/type_set.result | 49 + .../mysql-test/rocksdb/r/type_set_indexes.result | 115 + .../rocksdb/mysql-test/rocksdb/r/type_text.result | 57 + .../mysql-test/rocksdb/r/type_text_indexes.result | 181 + .../mysql-test/rocksdb/r/type_varbinary.result | 93 + .../mysql-test/rocksdb/r/type_varchar.result | 775 + .../mysql-test/rocksdb/r/unique_check.result | 117 + .../rocksdb/mysql-test/rocksdb/r/unique_sec.result | 221 + .../mysql-test/rocksdb/r/unique_sec_rev_cf.result | 177 + .../rocksdb/r/unsupported_tx_isolations.result | 18 + storage/rocksdb/mysql-test/rocksdb/r/update.result | 121 + .../mysql-test/rocksdb/r/update_ignore.result | 57 + .../mysql-test/rocksdb/r/update_multi.result | 691 + .../mysql-test/rocksdb/r/update_with_keys.result | 38 + .../use_direct_io_for_flush_and_compaction.result | 18 + .../mysql-test/rocksdb/r/use_direct_reads.result | 18 + .../rocksdb/r/use_direct_reads_writes.result | 22 + .../mysql-test/rocksdb/r/validate_datadic.result | 9 + .../mysql-test/rocksdb/r/varbinary_format.result | 260 + .../rocksdb/mysql-test/rocksdb/r/write_sync.result | 30 + storage/rocksdb/mysql-test/rocksdb/r/xa.result | 70 + .../rocksdb/mysql-test/rocksdb/slow_query_log.awk | 25 + storage/rocksdb/mysql-test/rocksdb/suite.opt | 1 + storage/rocksdb/mysql-test/rocksdb/suite.pm | 28 + storage/rocksdb/mysql-test/rocksdb/t/1st.test | 36 + .../rocksdb/t/2pc_group_commit-master.opt | 1 + .../mysql-test/rocksdb/t/2pc_group_commit.test | 168 + .../mysql-test/rocksdb/t/add_index_inplace.test | 417 + .../t/add_index_inplace_cardinality-master.opt | 1 + .../rocksdb/t/add_index_inplace_cardinality.test | 44 + .../rocksdb/t/add_index_inplace_crash.test | 118 + .../rocksdb/t/add_index_inplace_sstfilewriter.test | 113 + .../rocksdb/t/add_unique_index_inplace.test | 101 + .../rocksdb/t/allow_no_pk_concurrent_insert.test | 22 + .../mysql-test/rocksdb/t/allow_no_primary_key.test | 126 + .../rocksdb/t/allow_no_primary_key_with_sk.test | 149 + .../t/allow_to_start_after_corruption-master.opt | 1 + .../rocksdb/t/allow_to_start_after_corruption.test | 77 + .../rocksdb/mysql-test/rocksdb/t/alter_table.test | 94 + .../mysql-test/rocksdb/t/analyze_table.test | 57 + .../mysql-test/rocksdb/t/apply_changes_iter.test | 44 + .../mysql-test/rocksdb/t/autoinc_crash_safe.cnf | 8 + .../mysql-test/rocksdb/t/autoinc_crash_safe.test | 9 + .../rocksdb/t/autoinc_crash_safe_partition.cnf | 8 + .../rocksdb/t/autoinc_crash_safe_partition.test | 10 + .../mysql-test/rocksdb/t/autoinc_debug-master.opt | 1 + .../mysql-test/rocksdb/t/autoinc_debug.test | 121 + .../mysql-test/rocksdb/t/autoinc_secondary.test | 16 + .../rocksdb/mysql-test/rocksdb/t/autoinc_vars.test | 171 + .../mysql-test/rocksdb/t/autoinc_vars_thread.test | 65 + .../rocksdb/t/autoinc_vars_thread_2.test | 142 + .../mysql-test/rocksdb/t/binlog_rotate_crash.test | 31 + .../mysql-test/rocksdb/t/blind_delete_rc.cnf | 11 + .../mysql-test/rocksdb/t/blind_delete_rc.test | 3 + .../mysql-test/rocksdb/t/blind_delete_rr.cnf | 11 + .../mysql-test/rocksdb/t/blind_delete_rr.test | 3 + .../rocksdb/t/blind_delete_without_tx_api.inc | 132 + .../mysql-test/rocksdb/t/bloomfilter-master.opt | 2 + .../rocksdb/mysql-test/rocksdb/t/bloomfilter.inc | 78 + .../rocksdb/mysql-test/rocksdb/t/bloomfilter.test | 1 + .../mysql-test/rocksdb/t/bloomfilter2-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/bloomfilter2.test | 103 + .../mysql-test/rocksdb/t/bloomfilter3-master.opt | 4 + .../rocksdb/mysql-test/rocksdb/t/bloomfilter3.test | 136 + .../mysql-test/rocksdb/t/bloomfilter4-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/bloomfilter4.test | 52 + .../mysql-test/rocksdb/t/bloomfilter5-master.opt | 3 + .../rocksdb/mysql-test/rocksdb/t/bloomfilter5.test | 86 + .../rocksdb/t/bloomfilter_bulk_load-master.opt | 2 + .../rocksdb/t/bloomfilter_bulk_load.test | 35 + .../rocksdb/t/bloomfilter_load_select.inc | 190 + .../rocksdb/t/bloomfilter_skip-master.opt | 3 + .../mysql-test/rocksdb/t/bloomfilter_skip.test | 1 + .../mysql-test/rocksdb/t/bloomfilter_table_def.inc | 33 + .../rocksdb/mysql-test/rocksdb/t/bulk_load.test | 11 + .../mysql-test/rocksdb/t/bulk_load_drop_table.test | 19 + .../mysql-test/rocksdb/t/bulk_load_errors.test | 168 + .../mysql-test/rocksdb/t/bulk_load_rev_cf.test | 10 + .../rocksdb/t/bulk_load_rev_cf_and_data.test | 10 + .../mysql-test/rocksdb/t/bulk_load_rev_data.test | 10 + .../rocksdb/mysql-test/rocksdb/t/bulk_load_sk.test | 119 + .../mysql-test/rocksdb/t/bulk_load_unsorted.test | 6 + .../rocksdb/t/bulk_load_unsorted_errors.test | 8 + .../rocksdb/t/bulk_load_unsorted_rev.test | 5 + .../mysql-test/rocksdb/t/bypass_select_basic.inc | 213 + .../mysql-test/rocksdb/t/bypass_select_basic.test | 3 + .../rocksdb/t/bypass_select_basic_bloom-master.opt | 3 + .../rocksdb/t/bypass_select_basic_bloom.test | 3 + .../mysql-test/rocksdb/t/bytes_written.test | 22 + .../mysql-test/rocksdb/t/cardinality-master.opt | 3 + .../rocksdb/mysql-test/rocksdb/t/cardinality.test | 119 + .../rocksdb/mysql-test/rocksdb/t/check_flags.test | 117 + .../rocksdb/t/check_ignore_unknown_options.test | 56 + .../mysql-test/rocksdb/t/check_log_for_xa.py | 31 + .../rocksdb/mysql-test/rocksdb/t/check_table.inc | 54 + .../rocksdb/mysql-test/rocksdb/t/check_table.test | 12 + .../rocksdb/mysql-test/rocksdb/t/checkpoint.test | 107 + .../mysql-test/rocksdb/t/checksum_table.test | 84 + .../mysql-test/rocksdb/t/checksum_table_live.test | 24 + .../rocksdb/mysql-test/rocksdb/t/col_not_null.inc | 55 + .../rocksdb/t/col_not_null_timestamp.inc | 70 + storage/rocksdb/mysql-test/rocksdb/t/col_null.inc | 34 + .../mysql-test/rocksdb/t/col_opt_default.test | 27 + .../mysql-test/rocksdb/t/col_opt_not_null.test | 229 + .../rocksdb/mysql-test/rocksdb/t/col_opt_null.test | 220 + .../mysql-test/rocksdb/t/col_opt_unsigned.test | 74 + .../mysql-test/rocksdb/t/col_opt_zerofill.test | 67 + .../mysql-test/rocksdb/t/collation-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/collation.test | 211 + .../rocksdb/t/collation_exception-master.opt | 2 + .../mysql-test/rocksdb/t/collation_exception.test | 29 + .../rocksdb/mysql-test/rocksdb/t/com_rpc_tx.cnf | 4 + .../rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test | 90 + .../rocksdb/t/commit_in_the_middle_ddl.test | 27 + .../rocksdb/t/compact_deletes-master.opt | 3 + .../mysql-test/rocksdb/t/compact_deletes.test | 88 + .../mysql-test/rocksdb/t/compact_deletes_test.inc | 72 + .../mysql-test/rocksdb/t/compression_zstd.test | 14 + .../mysql-test/rocksdb/t/concurrent_alter.test | 39 + .../rocksdb/t/cons_snapshot_read_committed.opt | 1 + .../rocksdb/t/cons_snapshot_read_committed.test | 6 + .../rocksdb/t/cons_snapshot_repeatable_read.opt | 1 + .../rocksdb/t/cons_snapshot_repeatable_read.test | 6 + .../rocksdb/t/cons_snapshot_serializable.opt | 1 + .../rocksdb/t/cons_snapshot_serializable.test | 6 + .../mysql-test/rocksdb/t/consistent_snapshot.inc | 136 + .../rocksdb/t/corrupted_data_reads_debug.test | 80 + .../rocksdb/t/covered_unpack_info_format.test | 79 + .../t/create_no_primary_key_table-master.opt | 1 + .../rocksdb/t/create_no_primary_key_table.test | 63 + .../rocksdb/mysql-test/rocksdb/t/create_table.test | 192 + .../mysql-test/rocksdb/t/ddl_high_priority.test | 18 + storage/rocksdb/mysql-test/rocksdb/t/deadlock.test | 44 + .../mysql-test/rocksdb/t/deadlock_stats.test | 3 + .../mysql-test/rocksdb/t/deadlock_tracking.test | 185 + storage/rocksdb/mysql-test/rocksdb/t/delete.test | 101 + .../mysql-test/rocksdb/t/delete_ignore.test | 37 + .../rocksdb/mysql-test/rocksdb/t/delete_quick.test | 32 + .../mysql-test/rocksdb/t/delete_with_keys.test | 39 + storage/rocksdb/mysql-test/rocksdb/t/describe.test | 24 + storage/rocksdb/mysql-test/rocksdb/t/disabled.def | 98 + .../mysql-test/rocksdb/t/drop_database.test | 11 + .../mysql-test/rocksdb/t/drop_index_inplace.test | 116 + .../mysql-test/rocksdb/t/drop_stats_procedure.inc | 3 + .../mysql-test/rocksdb/t/drop_table-master.opt | 3 + .../rocksdb/mysql-test/rocksdb/t/drop_table.test | 145 + .../rocksdb/mysql-test/rocksdb/t/drop_table2.test | 131 + .../mysql-test/rocksdb/t/drop_table3-master.opt | 2 + .../rocksdb/mysql-test/rocksdb/t/drop_table3.inc | 52 + .../rocksdb/mysql-test/rocksdb/t/drop_table3.test | 5 + .../rocksdb/t/drop_table3_repopulate_table.inc | 15 + .../rocksdb/t/drop_table_repopulate_table.inc | 15 + .../mysql-test/rocksdb/t/drop_table_sync.inc | 6 + .../mysql-test/rocksdb/t/dup_key_update.test | 45 + .../mysql-test/rocksdb/t/duplicate_table.test | 16 + .../rocksdb/t/explicit_snapshot-master.opt | 1 + .../mysql-test/rocksdb/t/explicit_snapshot.test | 263 + .../mysql-test/rocksdb/t/fail_system_cf.test | 17 + .../rocksdb/t/fast_prefix_index_fetch.test | 120 + .../mysql-test/rocksdb/t/force_shutdown.test | 97 + .../rocksdb/mysql-test/rocksdb/t/foreign_key.test | 47 + .../rocksdb/t/gap_lock_issue254-master.opt | 1 + .../mysql-test/rocksdb/t/gap_lock_issue254.test | 14 + .../mysql-test/rocksdb/t/gap_lock_raise_error.test | 37 + .../mysql-test/rocksdb/t/get_error_message.test | 27 + .../mysql-test/rocksdb/t/group_min_max-master.opt | 1 + .../mysql-test/rocksdb/t/group_min_max.test | 9 + .../mysql-test/rocksdb/t/ha_extra_keyread.test | 15 + .../mysql-test/rocksdb/t/handler_basic.test | 53 + storage/rocksdb/mysql-test/rocksdb/t/hermitage.inc | 257 + .../rocksdb/mysql-test/rocksdb/t/hermitage.test | 10 + .../mysql-test/rocksdb/t/hermitage_init.inc | 8 + storage/rocksdb/mysql-test/rocksdb/t/i_s.test | 21 + storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test | 29 + .../rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test | 158 + storage/rocksdb/mysql-test/rocksdb/t/index.inc | 155 + storage/rocksdb/mysql-test/rocksdb/t/index.test | 44 + .../mysql-test/rocksdb/t/index_file_map-master.opt | 1 + .../mysql-test/rocksdb/t/index_file_map.test | 54 + .../mysql-test/rocksdb/t/index_key_block_size.test | 70 + .../rocksdb/t/index_merge_rocksdb-master.opt | 1 + .../mysql-test/rocksdb/t/index_merge_rocksdb.test | 110 + .../rocksdb/t/index_merge_rocksdb2-master.opt | 1 + .../mysql-test/rocksdb/t/index_merge_rocksdb2.test | 72 + .../mysql-test/rocksdb/t/index_primary.test | 96 + .../mysql-test/rocksdb/t/index_type_btree.test | 12 + .../mysql-test/rocksdb/t/index_type_hash.test | 12 + .../rocksdb/t/information_schema-master.opt | 1 + .../mysql-test/rocksdb/t/information_schema.test | 89 + .../mysql-test/rocksdb/t/init_stats_procedure.inc | 40 + .../t/innodb_i_s_tables_disabled-master.opt | 30 + .../rocksdb/t/innodb_i_s_tables_disabled.test | 43 + storage/rocksdb/mysql-test/rocksdb/t/insert.test | 99 + .../rocksdb/t/insert_optimized_config.test | 51 + .../mysql-test/rocksdb/t/insert_with_keys.test | 169 + storage/rocksdb/mysql-test/rocksdb/t/issue100.test | 23 + .../rocksdb/t/issue100_delete-master.opt | 1 + .../mysql-test/rocksdb/t/issue100_delete.test | 19 + storage/rocksdb/mysql-test/rocksdb/t/issue111.test | 38 + .../rocksdb/t/issue243_transactionStatus.test | 80 + storage/rocksdb/mysql-test/rocksdb/t/issue255.test | 52 + storage/rocksdb/mysql-test/rocksdb/t/issue290.test | 40 + storage/rocksdb/mysql-test/rocksdb/t/issue314.test | 16 + storage/rocksdb/mysql-test/rocksdb/t/issue495.test | 34 + storage/rocksdb/mysql-test/rocksdb/t/issue884.test | 43 + storage/rocksdb/mysql-test/rocksdb/t/issue896.test | 17 + storage/rocksdb/mysql-test/rocksdb/t/issue900.test | 13 + .../rocksdb/t/iterator_bounds-master.opt | 2 + .../mysql-test/rocksdb/t/iterator_bounds.test | 29 + storage/rocksdb/mysql-test/rocksdb/t/kill.test | 9 + .../mysql-test/rocksdb/t/level_read_committed.test | 6 + .../rocksdb/t/level_read_uncommitted.opt | 1 + .../rocksdb/t/level_read_uncommitted.test | 6 + .../rocksdb/t/level_repeatable_read.test | 5 + .../mysql-test/rocksdb/t/level_serializable.test | 5 + storage/rocksdb/mysql-test/rocksdb/t/loaddata.inc | 117 + storage/rocksdb/mysql-test/rocksdb/t/loaddata.test | 7 + storage/rocksdb/mysql-test/rocksdb/t/lock.test | 224 + .../rocksdb/mysql-test/rocksdb/t/lock_info.test | 31 + .../mysql-test/rocksdb/t/lock_rows_not_exist.test | 110 + .../rocksdb/t/lock_wait_timeout_stats.test | 38 + .../mysql-test/rocksdb/t/locking_issues.test | 3 + .../rocksdb/t/locking_issues_case1_1_rc.test | 4 + .../rocksdb/t/locking_issues_case1_1_rr.test | 4 + .../rocksdb/t/locking_issues_case1_2_rc.test | 4 + .../rocksdb/t/locking_issues_case1_2_rr.test | 4 + .../rocksdb/t/locking_issues_case2_rc.test | 5 + .../rocksdb/t/locking_issues_case2_rc_lsr.test | 5 + .../rocksdb/t/locking_issues_case2_rr.test | 5 + .../rocksdb/t/locking_issues_case2_rr_lsr.test | 5 + .../rocksdb/t/locking_issues_case3_rc.test | 4 + .../rocksdb/t/locking_issues_case3_rr.test | 4 + .../rocksdb/t/locking_issues_case4_rc.test | 4 + .../rocksdb/t/locking_issues_case4_rr.test | 4 + .../rocksdb/t/locking_issues_case5_rc.test | 4 + .../rocksdb/t/locking_issues_case5_rr.test | 4 + .../rocksdb/t/locking_issues_case6_rc.test | 4 + .../rocksdb/t/locking_issues_case6_rr.test | 4 + .../rocksdb/t/locking_issues_case7_rc.test | 5 + .../rocksdb/t/locking_issues_case7_rc_lsr.test | 5 + .../rocksdb/t/locking_issues_case7_rr.test | 5 + .../rocksdb/t/locking_issues_case7_rr_lsr.test | 5 + .../mysql-test/rocksdb/t/mariadb_ignore_dirs.test | 17 + .../rocksdb/t/mariadb_misc_binlog-master.opt | 1 + .../mysql-test/rocksdb/t/mariadb_misc_binlog.test | 40 + .../mysql-test/rocksdb/t/mariadb_plugin-master.opt | 1 + .../mysql-test/rocksdb/t/mariadb_plugin.test | 59 + .../mysql-test/rocksdb/t/mariadb_port_fixes.test | 114 + .../mysql-test/rocksdb/t/mariadb_port_rpl.test | 14 + .../mysql-test/rocksdb/t/max_open_files.test | 53 + storage/rocksdb/mysql-test/rocksdb/t/misc.test | 45 + .../rocksdb/t/multi_varchar_sk_lookup.test | 49 + .../rocksdb/t/mysqlbinlog_blind_replace.test | 62 + ...binlog_gtid_skip_empty_trans_rocksdb-master.opt | 1 + .../mysqlbinlog_gtid_skip_empty_trans_rocksdb.test | 16 + .../mysql-test/rocksdb/t/mysqldump-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/mysqldump.test | 67 + .../mysql-test/rocksdb/t/mysqldump2-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/mysqldump2.test | 43 + .../rocksdb/t/native_procedure-master.opt | 1 + .../mysql-test/rocksdb/t/native_procedure.test | 2 + .../mysql-test/rocksdb/t/negative_stats.test | 26 + .../mysql-test/rocksdb/t/no_merge_sort.test | 32 + .../rocksdb/t/no_primary_key_basic_ops.inc | 65 + .../t/optimize_myrocks_replace_into_base.test | 96 + .../t/optimize_myrocks_replace_into_lock.test | 88 + .../mysql-test/rocksdb/t/optimize_table-master.opt | 1 + .../mysql-test/rocksdb/t/optimize_table.inc | 20 + .../mysql-test/rocksdb/t/optimize_table.test | 81 + .../rocksdb/t/optimizer_loose_index_scans.test | 4 + .../rocksdb/mysql-test/rocksdb/t/partition.test | 762 + .../t/percona_nonflushing_analyze_debug.test | 11 + .../rocksdb/mysql-test/rocksdb/t/perf_context.test | 96 + .../mysql-test/rocksdb/t/persistent_cache.test | 41 + .../rocksdb/t/prefix_extractor_override-master.opt | 1 + .../rocksdb/t/prefix_extractor_override.test | 96 + .../mysql-test/rocksdb/t/read_only_tx-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/read_only_tx.test | 70 + .../rocksdb/t/records_in_range-master.opt | 3 + .../mysql-test/rocksdb/t/records_in_range.test | 146 + .../rocksdb/mysql-test/rocksdb/t/repair_table.inc | 38 + .../rocksdb/mysql-test/rocksdb/t/repair_table.test | 8 + storage/rocksdb/mysql-test/rocksdb/t/replace.test | 54 + .../mysql-test/rocksdb/t/rocksdb-master.opt | 1 + storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test | 1974 ++ .../rocksdb/t/rocksdb_cf_options-master.opt | 1 + .../rocksdb/t/rocksdb_cf_options-master.sh | 5 + .../mysql-test/rocksdb/t/rocksdb_cf_options.test | 75 + .../rocksdb/t/rocksdb_cf_per_partition.test | 513 + .../rocksdb/t/rocksdb_cf_reverse-master.opt | 1 + .../mysql-test/rocksdb/t/rocksdb_cf_reverse.test | 71 + .../rocksdb/t/rocksdb_checksums-master.opt | 3 + .../mysql-test/rocksdb/t/rocksdb_checksums.test | 152 + .../rocksdb/t/rocksdb_concurrent_delete.inc | 109 + .../rocksdb/t/rocksdb_concurrent_delete.test | 38 + .../rocksdb/t/rocksdb_concurrent_delete_main.inc | 30 + .../rocksdb/t/rocksdb_concurrent_delete_range.inc | 85 + .../rocksdb/t/rocksdb_concurrent_delete_sk.inc | 82 + .../rocksdb/t/rocksdb_concurrent_insert.py | 95 + .../mysql-test/rocksdb/t/rocksdb_datadir.test | 33 + .../rocksdb/t/rocksdb_deadlock_detect.inc | 123 + .../t/rocksdb_deadlock_detect_rc-master.opt | 1 + .../rocksdb/t/rocksdb_deadlock_detect_rc.test | 1 + .../rocksdb/t/rocksdb_deadlock_detect_rr.test | 1 + .../rocksdb/t/rocksdb_deadlock_stress.inc | 18 + .../rocksdb/t/rocksdb_deadlock_stress.py | 94 + .../t/rocksdb_deadlock_stress_rc-master.opt | 1 + .../rocksdb/t/rocksdb_deadlock_stress_rc.test | 1 + .../rocksdb/t/rocksdb_deadlock_stress_rr.test | 1 + .../mysql-test/rocksdb/t/rocksdb_debug.test | 14 + .../mysql-test/rocksdb/t/rocksdb_icp-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/rocksdb_icp.test | 44 + .../rocksdb/t/rocksdb_icp_rev-master.opt | 1 + .../mysql-test/rocksdb/t/rocksdb_icp_rev.test | 7 + .../mysql-test/rocksdb/t/rocksdb_locks-master.opt | 1 + .../mysql-test/rocksdb/t/rocksdb_locks.test | 94 + .../mysql-test/rocksdb/t/rocksdb_parts-master.opt | 1 + .../mysql-test/rocksdb/t/rocksdb_parts.test | 160 + .../mysql-test/rocksdb/t/rocksdb_qcache-master.opt | 1 + .../mysql-test/rocksdb/t/rocksdb_qcache.test | 43 + .../mysql-test/rocksdb/t/rocksdb_range-master.opt | 1 + .../mysql-test/rocksdb/t/rocksdb_range.test | 196 + .../mysql-test/rocksdb/t/rocksdb_range2.test | 33 + .../mysql-test/rocksdb/t/rocksdb_read_free_rpl.cnf | 16 + .../rocksdb/t/rocksdb_read_free_rpl.test | 414 + .../rocksdb/t/rocksdb_read_free_rpl_stress.cnf | 17 + .../rocksdb/t/rocksdb_read_free_rpl_stress.inc | 69 + .../rocksdb/t/rocksdb_read_free_rpl_stress.test | 22 + .../mysql-test/rocksdb/t/rocksdb_row_stats.test | 57 + .../t/rocksdb_table_stats_sampling_pct_change.test | 80 + .../rocksdb/t/rocksdb_timeout_rollback-master.opt | 1 + .../rocksdb/t/rocksdb_timeout_rollback.test | 78 + .../mysql-test/rocksdb/t/rollback_savepoint.test | 33 + .../mysql-test/rocksdb/t/rpl_row_not_found.cnf | 9 + .../mysql-test/rocksdb/t/rpl_row_not_found.inc | 98 + .../mysql-test/rocksdb/t/rpl_row_not_found.test | 4 + .../mysql-test/rocksdb/t/rpl_row_not_found_rc.cnf | 11 + .../mysql-test/rocksdb/t/rpl_row_not_found_rc.test | 4 + .../mysql-test/rocksdb/t/rpl_row_rocksdb.cnf | 1 + .../mysql-test/rocksdb/t/rpl_row_rocksdb.test | 48 + .../mysql-test/rocksdb/t/rpl_row_stats-slave.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/rpl_row_stats.cnf | 1 + .../mysql-test/rocksdb/t/rpl_row_stats.test | 47 + .../mysql-test/rocksdb/t/rpl_row_triggers.cnf | 19 + .../mysql-test/rocksdb/t/rpl_row_triggers.test | 262 + .../rocksdb/mysql-test/rocksdb/t/rpl_savepoint.cnf | 1 + .../mysql-test/rocksdb/t/rpl_savepoint.test | 91 + .../rocksdb/mysql-test/rocksdb/t/rpl_statement.cnf | 7 + .../mysql-test/rocksdb/t/rpl_statement.test | 59 + .../rocksdb/t/rpl_statement_not_found.cnf | 9 + .../rocksdb/t/rpl_statement_not_found.test | 3 + storage/rocksdb/mysql-test/rocksdb/t/rqg.inc | 44 + .../mysql-test/rocksdb/t/rqg_examples-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/rqg_examples.test | 12 + .../mysql-test/rocksdb/t/rqg_runtime-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/rqg_runtime.test | 58 + .../rocksdb/t/rqg_transactions-master.opt | 1 + .../mysql-test/rocksdb/t/rqg_transactions.test | 14 + storage/rocksdb/mysql-test/rocksdb/t/se-innodb.out | 1 + .../rocksdb/t/secondary_key_update_lock.test | 26 + storage/rocksdb/mysql-test/rocksdb/t/select.test | 202 + .../mysql-test/rocksdb/t/select_for_update.test | 55 + .../t/select_for_update_skip_locked_nowait.test | 48 + .../rocksdb/t/select_lock_in_share_mode.test | 54 + .../mysql-test/rocksdb/t/set_checkpoint.inc | 30 + .../rocksdb/mysql-test/rocksdb/t/show_engine.test | 103 + .../rocksdb/t/show_table_status-master.opt | 3 + .../mysql-test/rocksdb/t/show_table_status.test | 175 + .../mysql-test/rocksdb/t/shutdown-master.opt | 1 + storage/rocksdb/mysql-test/rocksdb/t/shutdown.test | 36 + .../mysql-test/rocksdb/t/singledelete-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/singledelete.test | 105 + .../rocksdb/t/skip_core_dump_on_error-master.opt | 1 + .../rocksdb/t/skip_core_dump_on_error.test | 53 + .../rocksdb/t/skip_validate_tmp_table.test | 39 + .../mysql-test/rocksdb/t/slow_query_log-master.opt | 1 + .../mysql-test/rocksdb/t/slow_query_log.test | 37 + .../mysql-test/rocksdb/t/statistics-master.opt | 3 + .../rocksdb/mysql-test/rocksdb/t/statistics.test | 82 + .../mysql-test/rocksdb/t/table_stats-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/table_stats.test | 29 + .../rocksdb/mysql-test/rocksdb/t/tbl_opt_ai.test | 29 + .../rocksdb/t/tbl_opt_avg_row_length.test | 23 + .../mysql-test/rocksdb/t/tbl_opt_checksum.test | 19 + .../mysql-test/rocksdb/t/tbl_opt_connection.test | 32 + .../rocksdb/t/tbl_opt_data_index_dir.test | 60 + .../rocksdb/t/tbl_opt_delay_key_write.test | 23 + .../rocksdb/t/tbl_opt_insert_method.test | 23 + .../rocksdb/t/tbl_opt_key_block_size.test | 23 + .../mysql-test/rocksdb/t/tbl_opt_max_rows.test | 23 + .../mysql-test/rocksdb/t/tbl_opt_min_rows.test | 23 + .../mysql-test/rocksdb/t/tbl_opt_pack_keys.test | 23 + .../mysql-test/rocksdb/t/tbl_opt_password.test | 27 + .../mysql-test/rocksdb/t/tbl_opt_row_format.test | 23 + .../mysql-test/rocksdb/t/tbl_opt_union.test | 28 + .../mysql-test/rocksdb/t/tbl_standard_opts.test | 42 + storage/rocksdb/mysql-test/rocksdb/t/tmpdir.test | 35 + .../rocksdb/mysql-test/rocksdb/t/transaction.test | 158 + .../mysql-test/rocksdb/t/transaction_isolation.inc | 150 + .../mysql-test/rocksdb/t/transaction_select.inc | 14 + .../mysql-test/rocksdb/t/truncate_partition.inc | 102 + .../mysql-test/rocksdb/t/truncate_partition.test | 83 + .../mysql-test/rocksdb/t/truncate_table.test | 74 + .../rocksdb/t/truncate_table3-master.opt | 2 + .../mysql-test/rocksdb/t/truncate_table3.test | 5 + storage/rocksdb/mysql-test/rocksdb/t/trx_info.test | 17 + .../rocksdb/mysql-test/rocksdb/t/trx_info_rpl.cnf | 11 + .../rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test | 44 + .../mysql-test/rocksdb/t/ttl_primary-master.opt | 2 + .../rocksdb/mysql-test/rocksdb/t/ttl_primary.test | 545 + .../t/ttl_primary_read_filtering-master.opt | 1 + .../rocksdb/t/ttl_primary_read_filtering.test | 388 + .../t/ttl_primary_with_partitions-master.opt | 2 + .../rocksdb/t/ttl_primary_with_partitions.test | 254 + .../mysql-test/rocksdb/t/ttl_rows_examined.test | 57 + .../mysql-test/rocksdb/t/ttl_secondary-master.opt | 2 + .../mysql-test/rocksdb/t/ttl_secondary.test | 780 + .../t/ttl_secondary_read_filtering-master.opt | 1 + .../rocksdb/t/ttl_secondary_read_filtering.test | 503 + ...tl_secondary_read_filtering_multiple_index.test | 87 + .../t/ttl_secondary_with_partitions-master.opt | 1 + .../rocksdb/t/ttl_secondary_with_partitions.test | 300 + .../rocksdb/mysql-test/rocksdb/t/type_binary.inc | 45 + .../rocksdb/mysql-test/rocksdb/t/type_binary.test | 8 + .../rocksdb/t/type_binary_indexes-master.opt | 1 + .../mysql-test/rocksdb/t/type_binary_indexes.test | 99 + storage/rocksdb/mysql-test/rocksdb/t/type_bit.inc | 53 + storage/rocksdb/mysql-test/rocksdb/t/type_bit.test | 8 + .../rocksdb/t/type_bit_indexes-master.opt | 1 + .../mysql-test/rocksdb/t/type_bit_indexes.test | 113 + storage/rocksdb/mysql-test/rocksdb/t/type_blob.inc | 49 + .../rocksdb/mysql-test/rocksdb/t/type_blob.test | 8 + .../rocksdb/t/type_blob_indexes-master.opt | 1 + .../mysql-test/rocksdb/t/type_blob_indexes.test | 176 + storage/rocksdb/mysql-test/rocksdb/t/type_bool.inc | 64 + .../rocksdb/mysql-test/rocksdb/t/type_bool.test | 8 + storage/rocksdb/mysql-test/rocksdb/t/type_char.inc | 45 + .../rocksdb/mysql-test/rocksdb/t/type_char.test | 19 + .../rocksdb/t/type_char_indexes-master.opt | 1 + .../mysql-test/rocksdb/t/type_char_indexes.test | 107 + .../t/type_char_indexes_collation-master.opt | 1 + .../rocksdb/t/type_char_indexes_collation.test | 126 + .../mysql-test/rocksdb/t/type_date_time.inc | 47 + .../mysql-test/rocksdb/t/type_date_time.test | 9 + .../rocksdb/t/type_date_time_indexes-master.opt | 1 + .../rocksdb/t/type_date_time_indexes.test | 157 + .../mysql-test/rocksdb/t/type_decimal-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/type_decimal.test | 88 + storage/rocksdb/mysql-test/rocksdb/t/type_enum.inc | 50 + .../rocksdb/mysql-test/rocksdb/t/type_enum.test | 8 + .../rocksdb/t/type_enum_indexes-master.opt | 1 + .../mysql-test/rocksdb/t/type_enum_indexes.test | 93 + .../rocksdb/mysql-test/rocksdb/t/type_fixed.inc | 85 + .../rocksdb/mysql-test/rocksdb/t/type_fixed.test | 8 + .../rocksdb/t/type_fixed_indexes-master.opt | 1 + .../mysql-test/rocksdb/t/type_fixed_indexes.test | 107 + .../rocksdb/mysql-test/rocksdb/t/type_float.inc | 121 + .../rocksdb/mysql-test/rocksdb/t/type_float.test | 8 + .../rocksdb/t/type_float_indexes-master.opt | 1 + .../mysql-test/rocksdb/t/type_float_indexes.test | 175 + storage/rocksdb/mysql-test/rocksdb/t/type_int.inc | 68 + storage/rocksdb/mysql-test/rocksdb/t/type_int.test | 8 + .../rocksdb/t/type_int_indexes-master.opt | 1 + .../mysql-test/rocksdb/t/type_int_indexes.test | 75 + storage/rocksdb/mysql-test/rocksdb/t/type_set.inc | 49 + storage/rocksdb/mysql-test/rocksdb/t/type_set.test | 8 + .../rocksdb/t/type_set_indexes-master.opt | 1 + .../mysql-test/rocksdb/t/type_set_indexes.test | 100 + storage/rocksdb/mysql-test/rocksdb/t/type_text.inc | 49 + .../rocksdb/mysql-test/rocksdb/t/type_text.test | 8 + .../rocksdb/t/type_text_indexes-master.opt | 1 + .../mysql-test/rocksdb/t/type_text_indexes.test | 171 + .../mysql-test/rocksdb/t/type_varbinary.inc | 75 + .../mysql-test/rocksdb/t/type_varbinary.test | 8 + .../mysql-test/rocksdb/t/type_varchar-master.opt | 1 + .../rocksdb/mysql-test/rocksdb/t/type_varchar.inc | 77 + .../rocksdb/mysql-test/rocksdb/t/type_varchar.test | 82 + .../mysql-test/rocksdb/t/type_varchar_endspace.inc | 85 + .../rocksdb/mysql-test/rocksdb/t/unique_check.test | 173 + .../rocksdb/mysql-test/rocksdb/t/unique_sec.inc | 198 + .../rocksdb/mysql-test/rocksdb/t/unique_sec.test | 51 + .../mysql-test/rocksdb/t/unique_sec_rev_cf.test | 4 + .../rocksdb/t/unsupported_tx_isolations.test | 25 + storage/rocksdb/mysql-test/rocksdb/t/update.test | 82 + .../mysql-test/rocksdb/t/update_ignore-master.opt | 1 + .../mysql-test/rocksdb/t/update_ignore.test | 35 + .../rocksdb/mysql-test/rocksdb/t/update_multi.test | 15 + .../mysql-test/rocksdb/t/update_multi_exec.inc | 27 + .../mysql-test/rocksdb/t/update_with_keys.test | 78 + .../t/use_direct_io_for_flush_and_compaction.test | 5 + .../mysql-test/rocksdb/t/use_direct_reads.test | 5 + .../rocksdb/t/use_direct_reads_writes.test | 62 + .../mysql-test/rocksdb/t/validate_datadic.test | 108 + .../mysql-test/rocksdb/t/varbinary_format.test | 131 + .../rocksdb/mysql-test/rocksdb/t/write_sync.test | 41 + storage/rocksdb/mysql-test/rocksdb/t/xa-master.opt | 1 + storage/rocksdb/mysql-test/rocksdb/t/xa.test | 73 + .../rocksdb/mysql-test/rocksdb_hotbackup/base.cnf | 25 + .../rocksdb_hotbackup/include/clean_tmpfiles.sh | 8 + .../rocksdb_hotbackup/include/cleanup.inc | 3 + .../include/create_slocket_socket.sh | 4 + .../rocksdb_hotbackup/include/create_table.sh | 18 + .../rocksdb_hotbackup/include/load_data.sh | 45 + .../rocksdb_hotbackup/include/load_data_and_run.sh | 11 + .../rocksdb_hotbackup/include/load_data_slocket.sh | 45 + .../include/remove_slocket_socket.sh | 4 + .../mysql-test/rocksdb_hotbackup/include/setup.inc | 16 + .../include/setup_replication_gtid.sh | 22 + .../include/setup_replication_gtid_and_sync.inc | 4 + .../rocksdb_hotbackup/include/setup_slocket.inc | 10 + .../rocksdb_hotbackup/include/stream_run.sh | 81 + .../rocksdb/mysql-test/rocksdb_hotbackup/my.cnf | 2 + .../mysql-test/rocksdb_hotbackup/r/gtid.result | 23 + .../mysql-test/rocksdb_hotbackup/r/slocket.result | 41 + .../mysql-test/rocksdb_hotbackup/r/stream.result | 20 + .../mysql-test/rocksdb_hotbackup/r/wdt.result | 20 + .../mysql-test/rocksdb_hotbackup/r/xbstream.result | 21 + .../rocksdb_hotbackup/r/xbstream_direct.result | 21 + .../rocksdb_hotbackup/r/xbstream_socket.result | 20 + .../mysql-test/rocksdb_hotbackup/t/gtid-master.opt | 1 + .../mysql-test/rocksdb_hotbackup/t/gtid-slave.opt | 1 + .../mysql-test/rocksdb_hotbackup/t/gtid.test | 47 + .../mysql-test/rocksdb_hotbackup/t/slocket.test | 46 + .../mysql-test/rocksdb_hotbackup/t/stream.test | 22 + .../mysql-test/rocksdb_hotbackup/t/wdt.test | 22 + .../mysql-test/rocksdb_hotbackup/t/xbstream.inc | 25 + .../mysql-test/rocksdb_hotbackup/t/xbstream.test | 7 + .../rocksdb_hotbackup/t/xbstream_direct-master.opt | 1 + .../rocksdb_hotbackup/t/xbstream_direct.test | 7 + .../rocksdb_hotbackup/t/xbstream_socket.test | 22 + .../rocksdb/mysql-test/rocksdb_rpl/combinations | 7 + .../rocksdb_rpl/include/have_rocksdb.inc | 10 + .../rocksdb_rpl/include/have_rocksdb.opt | 12 + .../rocksdb_rpl/include/rpl_gtid_crash_safe.inc | 37 + .../include/rpl_no_unique_check_on_lag.inc | 72 + storage/rocksdb/mysql-test/rocksdb_rpl/my.cnf | 17 + .../r/consistent_snapshot_mixed_engines.result | 68 + .../mysql-test/rocksdb_rpl/r/mdev12179.result | 283 + .../rocksdb_rpl/r/multiclient_2pc.result | 26 + .../r/optimize_myrocks_replace_into.result | 282 + ...sdb_slave_check_before_image_consistency.result | 165 + .../rocksdb_rpl/r/rpl_binlog_xid_count.result | 204 + .../r/rpl_crash_safe_wal_corrupt.result | 135 + .../rocksdb_rpl/r/rpl_ddl_high_priority.result | 39 + .../rocksdb_rpl/r/rpl_gtid_crash_safe.result | 361 + .../r/rpl_gtid_crash_safe_optimized.result | 361 + .../r/rpl_gtid_crash_safe_wal_corrupt.result | 140 + .../r/rpl_gtid_rocksdb_sys_header.result | 16 + .../r/rpl_missing_columns_sk_update.result | 62 + .../rpl_mts_dependency_unique_key_conflicts.result | 44 + .../r/rpl_no_unique_check_on_lag.result | 34 + .../r/rpl_no_unique_check_on_lag_mts.result | 31 + .../r/rpl_rocksdb_2pc_crash_recover.result | 44 + .../r/rpl_rocksdb_slave_gtid_info_optimized.result | 43 + .../rocksdb_rpl/r/rpl_rocksdb_snapshot.result | 222 + .../r/rpl_rocksdb_snapshot_without_gtid.result | 15 + .../rocksdb_rpl/r/rpl_rocksdb_stress_crash.result | 28 + .../r/rpl_skip_trx_api_binlog_format.result | 27 + .../rocksdb/mysql-test/rocksdb_rpl/r/rpl_xa.result | 61 + .../r/singledelete_idempotent_recovery.result | 25 + .../r/singledelete_idempotent_table.result | 29 + .../mysql-test/rocksdb_rpl/rpl_1slave_base.cnf | 51 + storage/rocksdb/mysql-test/rocksdb_rpl/suite.opt | 1 + storage/rocksdb/mysql-test/rocksdb_rpl/suite.pm | 25 + .../t/consistent_snapshot_mixed_engines-master.opt | 1 + .../t/consistent_snapshot_mixed_engines.test | 81 + .../rocksdb/mysql-test/rocksdb_rpl/t/disabled.def | 34 + .../mysql-test/rocksdb_rpl/t/mdev12179.test | 317 + .../rocksdb_rpl/t/multiclient_2pc-master.opt | 1 + .../mysql-test/rocksdb_rpl/t/multiclient_2pc.test | 77 + .../t/optimize_myrocks_replace_into.test | 149 + ..._slave_check_before_image_consistency-slave.opt | 1 + ...cksdb_slave_check_before_image_consistency.test | 22 + .../rocksdb_rpl/t/rpl_binlog_xid_count-master.opt | 3 + .../rocksdb_rpl/t/rpl_binlog_xid_count.test | 20 + .../rocksdb_rpl/t/rpl_check_for_binlog_info.pl | 19 + .../rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.cnf | 13 + .../rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.test | 12 + .../rocksdb_rpl/t/rpl_ddl_high_priority.test | 2 + .../rocksdb_rpl/t/rpl_gtid_crash_safe-master.opt | 1 + .../rocksdb_rpl/t/rpl_gtid_crash_safe-slave.opt | 2 + .../rocksdb_rpl/t/rpl_gtid_crash_safe.test | 11 + .../t/rpl_gtid_crash_safe_optimized-master.opt | 1 + .../t/rpl_gtid_crash_safe_optimized-slave.opt | 2 + .../t/rpl_gtid_crash_safe_optimized.test | 11 + .../t/rpl_gtid_crash_safe_wal_corrupt.cnf | 18 + .../t/rpl_gtid_crash_safe_wal_corrupt.inc | 154 + .../t/rpl_gtid_crash_safe_wal_corrupt.test | 12 + .../t/rpl_gtid_rocksdb_sys_header-master.opt | 1 + .../t/rpl_gtid_rocksdb_sys_header-slave.opt | 1 + .../rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header.test | 40 + .../t/rpl_missing_columns_sk_update.cnf | 13 + .../t/rpl_missing_columns_sk_update.test | 69 + .../t/rpl_mts_dependency_unique_key_conflicts.test | 64 + .../t/rpl_no_unique_check_on_lag-slave.opt | 1 + .../rocksdb_rpl/t/rpl_no_unique_check_on_lag.test | 7 + .../t/rpl_no_unique_check_on_lag_mts-slave.opt | 1 + .../t/rpl_no_unique_check_on_lag_mts.test | 3 + .../t/rpl_rocksdb_2pc_crash_recover-master.opt | 1 + .../t/rpl_rocksdb_2pc_crash_recover-slave.opt | 1 + .../t/rpl_rocksdb_2pc_crash_recover.test | 57 + ...pl_rocksdb_slave_gtid_info_optimized-master.opt | 1 + ...rpl_rocksdb_slave_gtid_info_optimized-slave.opt | 1 + .../t/rpl_rocksdb_slave_gtid_info_optimized.test | 51 + .../rocksdb_rpl/t/rpl_rocksdb_snapshot-master.opt | 1 + .../rocksdb_rpl/t/rpl_rocksdb_snapshot-slave.opt | 1 + .../rocksdb_rpl/t/rpl_rocksdb_snapshot.test | 374 + .../t/rpl_rocksdb_snapshot_without_gtid.test | 18 + .../t/rpl_rocksdb_stress_crash-master.opt | 2 + .../t/rpl_rocksdb_stress_crash-slave.opt | 3 + .../rocksdb_rpl/t/rpl_rocksdb_stress_crash.test | 26 + .../t/rpl_skip_trx_api_binlog_format-master.opt | 2 + .../t/rpl_skip_trx_api_binlog_format-slave.opt | 2 + .../t/rpl_skip_trx_api_binlog_format.test | 52 + .../rocksdb/mysql-test/rocksdb_rpl/t/rpl_xa.inc | 84 + .../rocksdb/mysql-test/rocksdb_rpl/t/rpl_xa.test | 7 + .../t/singledelete_idempotent_recovery.cnf | 15 + .../t/singledelete_idempotent_recovery.test | 78 + .../t/singledelete_idempotent_table.cnf | 11 + .../t/singledelete_idempotent_table.test | 44 + .../rocksdb/mysql-test/rocksdb_stress/combinations | 5 + .../rocksdb_stress/include/have_rocksdb.inc | 10 + .../rocksdb_stress/include/have_rocksdb.opt | 12 + .../rocksdb_stress/include/rocksdb_stress.inc | 57 + storage/rocksdb/mysql-test/rocksdb_stress/my.cnf | 9 + .../rocksdb_stress/r/rocksdb_stress.result | 23 + .../rocksdb_stress/r/rocksdb_stress_crash.result | 23 + .../rocksdb/mysql-test/rocksdb_stress/suite.opt | 2 + storage/rocksdb/mysql-test/rocksdb_stress/suite.pm | 28 + .../mysql-test/rocksdb_stress/t/disabled.def | 2 + .../mysql-test/rocksdb_stress/t/load_generator.py | 1042 + .../rocksdb_stress/t/rocksdb_stress.test | 33 + .../rocksdb_stress/t/rocksdb_stress_crash.test | 34 + .../rocksdb_sys_vars/include/correctboolvalue.inc | 25 + .../rocksdb_sys_vars/include/have_rocksdb.inc | 10 + .../rocksdb_sys_vars/include/have_rocksdb.opt | 12 + .../rocksdb_sys_vars/include/rocksdb_sys_var.inc | 124 + storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf | 10 + .../mysql-test/rocksdb_sys_vars/r/all_vars.result | 13 + ...db_access_hint_on_compaction_start_basic.result | 7 + .../r/rocksdb_advise_random_on_open_basic.result | 7 + ...db_allow_concurrent_memtable_write_basic.result | 7 + .../r/rocksdb_allow_mmap_reads_basic.result | 7 + .../r/rocksdb_allow_mmap_writes_basic.result | 7 + ...db_allow_to_start_after_corruption_basic.result | 7 + .../rocksdb_blind_delete_primary_key_basic.result | 100 + .../r/rocksdb_block_cache_size_basic.result | 85 + .../r/rocksdb_block_restart_interval_basic.result | 7 + .../r/rocksdb_block_size_basic.result | 7 + .../r/rocksdb_block_size_deviation_basic.result | 7 + .../r/rocksdb_bulk_load_allow_sk_basic.result | 100 + .../rocksdb_bulk_load_allow_unsorted_basic.result | 100 + .../r/rocksdb_bulk_load_basic.result | 100 + .../r/rocksdb_bulk_load_size_basic.result | 72 + .../r/rocksdb_bytes_per_sync_basic.result | 85 + .../r/rocksdb_cache_dump_basic.result | 19 + .../rocksdb_cache_high_pri_pool_ratio_basic.result | 22 + ...ksdb_cache_index_and_filter_blocks_basic.result | 7 + ...ndex_and_filter_with_high_priority_basic.result | 19 + .../r/rocksdb_checksums_pct_basic.result | 93 + .../r/rocksdb_collect_sst_properties_basic.result | 7 + .../r/rocksdb_commit_in_the_middle_basic.result | 100 + ...sdb_commit_time_batch_for_recovery_basic.result | 121 + .../r/rocksdb_compact_cf_basic.result | 40 + .../rocksdb_compaction_readahead_size_basic.result | 70 + ...ksdb_compaction_sequential_deletes_basic.result | 64 + ...action_sequential_deletes_count_sd_basic.result | 64 + ...ction_sequential_deletes_file_size_basic.result | 46 + ...mpaction_sequential_deletes_window_basic.result | 64 + .../r/rocksdb_create_checkpoint_basic.result | 15 + .../r/rocksdb_create_if_missing_basic.result | 14 + ...sdb_create_missing_column_families_basic.result | 14 + .../r/rocksdb_datadir_basic.result | 7 + .../r/rocksdb_db_write_buffer_size_basic.result | 7 + .../r/rocksdb_deadlock_detect_basic.result | 121 + .../r/rocksdb_deadlock_detect_depth_basic.result | 79 + ...ksdb_debug_manual_compaction_delay_basic.result | 46 + ...ebug_optimizer_no_zero_cardinality_basic.result | 64 + .../r/rocksdb_debug_ttl_ignore_pk_basic.result | 64 + .../rocksdb_debug_ttl_read_filter_ts_basic.result | 46 + .../r/rocksdb_debug_ttl_rec_ts_basic.result | 46 + .../r/rocksdb_debug_ttl_snapshot_ts_basic.result | 46 + .../r/rocksdb_default_cf_options_basic.result | 7 + .../r/rocksdb_delayed_write_rate_basic.result | 85 + .../r/rocksdb_delete_cf_basic.result | 6 + ...elete_obsolete_files_period_micros_basic.result | 7 + .../r/rocksdb_enable_2pc_basic.result | 75 + .../r/rocksdb_enable_bulk_load_api_basic.result | 14 + ..._enable_insert_with_update_caching_basic.result | 75 + .../r/rocksdb_enable_thread_tracking_basic.result | 7 + .../r/rocksdb_enable_ttl_basic.result | 64 + .../rocksdb_enable_ttl_read_filtering_basic.result | 64 + ...enable_write_thread_adaptive_yield_basic.result | 7 + .../r/rocksdb_error_if_exists_basic.result | 14 + ...ksdb_error_on_suboptimal_collation_basic.result | 7 + .../r/rocksdb_flush_log_at_trx_commit_basic.result | 57 + ...cksdb_force_compute_memtable_stats_basic.result | 15 + ...e_compute_memtable_stats_cachetime_basic.result | 68 + ...force_flush_memtable_and_lzero_now_basic.result | 50 + .../rocksdb_force_flush_memtable_now_basic.result | 50 + ...cksdb_force_index_records_in_range_basic.result | 106 + .../r/rocksdb_git_hash_basic.result | 7 + ...rocksdb_hash_index_allow_collision_basic.result | 7 + .../r/rocksdb_ignore_unknown_options_basic.result | 14 + .../r/rocksdb_index_type_basic.result | 7 + .../r/rocksdb_info_log_level_basic.result | 93 + .../r/rocksdb_io_write_timeout_basic.result | 86 + .../r/rocksdb_is_fd_close_on_exec_basic.result | 7 + .../r/rocksdb_keep_log_file_num_basic.result | 7 + .../r/rocksdb_large_prefix_basic.result | 64 + .../r/rocksdb_lock_scanned_rows_basic.result | 170 + .../r/rocksdb_lock_wait_timeout_basic.result | 72 + .../r/rocksdb_log_file_time_to_roll_basic.result | 7 + ...ocksdb_manifest_preallocation_size_basic.result | 7 + .../rocksdb_manual_compaction_threads_basic.result | 93 + .../r/rocksdb_manual_wal_flush_basic.result | 14 + .../r/rocksdb_master_skip_tx_api_basic.result | 100 + .../r/rocksdb_max_background_jobs_basic.result | 46 + .../r/rocksdb_max_latest_deadlocks_basic.result | 53 + .../r/rocksdb_max_log_file_size_basic.result | 7 + .../r/rocksdb_max_manifest_file_size_basic.result | 7 + .../r/rocksdb_max_manual_compactions_basic.result | 57 + .../r/rocksdb_max_open_files_basic.result | 3 + .../r/rocksdb_max_row_locks_basic.result | 93 + .../r/rocksdb_max_subcompactions_basic.result | 7 + .../r/rocksdb_max_total_wal_size_basic.result | 7 + .../r/rocksdb_merge_buf_size_basic.result | 43 + .../r/rocksdb_merge_combine_read_size_basic.result | 29 + ...db_merge_tmp_file_removal_delay_ms_basic.result | 93 + ...table_reader_for_compaction_inputs_basic.result | 7 + .../r/rocksdb_no_block_cache_basic.result | 7 + .../r/rocksdb_override_cf_options_basic.result | 7 + .../r/rocksdb_paranoid_checks_basic.result | 7 + .../r/rocksdb_pause_background_work_basic.result | 75 + .../r/rocksdb_perf_context_level_basic.result | 114 + .../r/rocksdb_persistent_cache_path_basic.result | 13 + .../rocksdb_persistent_cache_size_mb_basic.result | 14 + ...0_filter_and_index_blocks_in_cache_basic.result | 7 + ...db_print_snapshot_conflict_queries_basic.result | 64 + ...rocksdb_rate_limiter_bytes_per_sec_basic.result | 101 + .../r/rocksdb_read_free_rpl_basic.result | 58 + .../r/rocksdb_read_free_rpl_tables_basic.result | 49 + .../r/rocksdb_records_in_range_basic.result | 100 + ...ksdb_remove_mariabackup_checkpoint_basic.result | 4 + .../r/rocksdb_reset_stats_basic.result | 97 + .../r/rocksdb_rollback_on_timeout_basic.result | 97 + ...ksdb_seconds_between_stat_computes_basic.result | 64 + .../rocksdb_signal_drop_index_thread_basic.result | 64 + .../r/rocksdb_sim_cache_size_basic.result | 7 + .../rocksdb_skip_bloom_filter_on_read_basic.result | 100 + .../r/rocksdb_skip_fill_cache_basic.result | 100 + .../rocksdb_skip_unique_check_tables_basic.result | 67 + ...rocksdb_sst_mgr_rate_bytes_per_sec_basic.result | 85 + .../r/rocksdb_stats_dump_period_sec_basic.result | 7 + .../r/rocksdb_stats_level_basic.result | 85 + .../r/rocksdb_stats_recalc_rate_basic.result | 53 + .../rocksdb_store_row_debug_checksums_basic.result | 100 + .../r/rocksdb_strict_collation_check_basic.result | 75 + ...ocksdb_strict_collation_exceptions_basic.result | 36 + ...ocksdb_supported_compression_types_basic.result | 4 + .../rocksdb_table_cache_numshardbits_basic.result | 7 + .../rocksdb_table_stats_sampling_pct_basic.result | 85 + .../rocksdb_sys_vars/r/rocksdb_tmpdir_basic.result | 29 + .../r/rocksdb_trace_sst_api_basic.result | 100 + .../r/rocksdb_two_write_queues_basic.result | 14 + .../r/rocksdb_unsafe_for_binlog_basic.result | 100 + .../r/rocksdb_update_cf_options.result | 38 + .../r/rocksdb_update_cf_options_basic.result | 126 + .../r/rocksdb_use_adaptive_mutex_basic.result | 7 + .../r/rocksdb_use_clock_cache_basic.result | 19 + ...direct_io_for_flush_and_compaction_basic.result | 7 + .../r/rocksdb_use_direct_reads_basic.result | 7 + .../r/rocksdb_use_fsync_basic.result | 7 + .../r/rocksdb_validate_tables_basic.result | 7 + ...rocksdb_verify_row_debug_checksums_basic.result | 100 + .../r/rocksdb_wal_bytes_per_sync_basic.result | 85 + .../r/rocksdb_wal_dir_basic.result | 7 + .../r/rocksdb_wal_recovery_mode_basic.result | 46 + .../r/rocksdb_wal_size_limit_mb_basic.result | 7 + .../r/rocksdb_wal_ttl_seconds_basic.result | 7 + .../r/rocksdb_whole_key_filtering_basic.result | 7 + .../r/rocksdb_write_batch_max_bytes_basic.result | 15 + .../r/rocksdb_write_disable_wal_basic.result | 114 + ...ite_ignore_missing_column_families_basic.result | 100 + .../r/rocksdb_write_policy_basic.result | 15 + .../rocksdb/mysql-test/rocksdb_sys_vars/suite.opt | 1 + .../rocksdb/mysql-test/rocksdb_sys_vars/suite.pm | 21 + .../mysql-test/rocksdb_sys_vars/t/all_vars.test | 39 + .../mysql-test/rocksdb_sys_vars/t/disabled.def | 5 + ...ksdb_access_hint_on_compaction_start_basic.test | 7 + .../t/rocksdb_advise_random_on_open_basic.test | 6 + ...ksdb_allow_concurrent_memtable_write_basic.test | 5 + .../t/rocksdb_allow_mmap_reads_basic.test | 6 + .../t/rocksdb_allow_mmap_writes_basic.test | 6 + ...ksdb_allow_to_start_after_corruption_basic.test | 6 + .../t/rocksdb_blind_delete_primary_key_basic.test | 18 + .../t/rocksdb_block_cache_size_basic.test | 21 + .../t/rocksdb_block_restart_interval_basic.test | 6 + .../t/rocksdb_block_size_basic.test | 7 + .../t/rocksdb_block_size_deviation_basic.test | 7 + .../t/rocksdb_bulk_load_allow_sk_basic.test | 18 + .../t/rocksdb_bulk_load_allow_unsorted_basic.test | 18 + .../t/rocksdb_bulk_load_basic.test | 18 + .../t/rocksdb_bulk_load_size_basic.test | 16 + .../t/rocksdb_bytes_per_sync_basic.test | 22 + .../t/rocksdb_cache_dump_basic.test | 21 + .../t/rocksdb_cache_high_pri_pool_ratio_basic.test | 24 + ...ocksdb_cache_index_and_filter_blocks_basic.test | 6 + ..._index_and_filter_with_high_priority_basic.test | 21 + .../t/rocksdb_checksums_pct_basic.test | 17 + .../t/rocksdb_collect_sst_properties_basic.test | 8 + .../t/rocksdb_commit_in_the_middle_basic.test | 18 + ...cksdb_commit_time_batch_for_recovery_basic.test | 20 + .../t/rocksdb_compact_cf_basic.test | 19 + .../t/rocksdb_compaction_readahead_size_basic.test | 23 + ...ocksdb_compaction_sequential_deletes_basic.test | 18 + ...mpaction_sequential_deletes_count_sd_basic.test | 18 + ...paction_sequential_deletes_file_size_basic.test | 16 + ...compaction_sequential_deletes_window_basic.test | 18 + .../t/rocksdb_create_checkpoint_basic.test | 29 + .../t/rocksdb_create_if_missing_basic.test | 16 + ...cksdb_create_missing_column_families_basic.test | 16 + .../rocksdb_sys_vars/t/rocksdb_datadir_basic.test | 6 + .../t/rocksdb_db_write_buffer_size_basic.test | 6 + .../t/rocksdb_deadlock_detect_basic.test | 20 + .../t/rocksdb_deadlock_detect_depth_basic.test | 17 + ...ocksdb_debug_manual_compaction_delay_basic.test | 16 + ..._debug_optimizer_no_zero_cardinality_basic.test | 18 + .../t/rocksdb_debug_ttl_ignore_pk_basic.test | 18 + .../t/rocksdb_debug_ttl_read_filter_ts_basic.test | 16 + .../t/rocksdb_debug_ttl_rec_ts_basic.test | 16 + .../t/rocksdb_debug_ttl_snapshot_ts_basic.test | 16 + .../t/rocksdb_default_cf_options_basic.test | 6 + .../t/rocksdb_delayed_write_rate_basic.test | 22 + .../t/rocksdb_delete_cf_basic-master.opt | 1 + .../t/rocksdb_delete_cf_basic.test | 75 + ..._delete_obsolete_files_period_micros_basic.test | 6 + .../t/rocksdb_enable_2pc_basic.test | 20 + .../t/rocksdb_enable_bulk_load_api_basic.test | 16 + ...db_enable_insert_with_update_caching_basic.test | 21 + .../t/rocksdb_enable_thread_tracking_basic.test | 6 + .../t/rocksdb_enable_ttl_basic.test | 18 + .../t/rocksdb_enable_ttl_read_filtering_basic.test | 18 + ...b_enable_write_thread_adaptive_yield_basic.test | 5 + .../t/rocksdb_error_if_exists_basic.test | 16 + ...ocksdb_error_on_suboptimal_collation_basic.test | 6 + .../t/rocksdb_flush_log_at_trx_commit_basic.test | 17 + ...rocksdb_force_compute_memtable_stats_basic.test | 23 + ...rce_compute_memtable_stats_cachetime_basic.test | 18 + ...b_force_flush_memtable_and_lzero_now_basic.test | 17 + .../t/rocksdb_force_flush_memtable_now_basic.test | 17 + ...rocksdb_force_index_records_in_range_basic.test | 23 + .../rocksdb_sys_vars/t/rocksdb_git_hash_basic.test | 6 + .../rocksdb_hash_index_allow_collision_basic.test | 7 + .../t/rocksdb_ignore_unknown_options_basic.test | 16 + .../t/rocksdb_index_type_basic.test | 7 + .../t/rocksdb_info_log_level_basic.test | 21 + .../t/rocksdb_io_write_timeout_basic.test | 20 + .../t/rocksdb_is_fd_close_on_exec_basic.test | 6 + .../t/rocksdb_keep_log_file_num_basic.test | 7 + .../t/rocksdb_large_prefix_basic.test | 18 + .../t/rocksdb_lock_scanned_rows_basic.test | 22 + .../t/rocksdb_lock_wait_timeout_basic.test | 16 + .../t/rocksdb_log_file_time_to_roll_basic.test | 6 + .../rocksdb_manifest_preallocation_size_basic.test | 6 + .../t/rocksdb_manual_compaction_threads_basic.test | 17 + .../t/rocksdb_manual_wal_flush_basic.test | 16 + .../t/rocksdb_master_skip_tx_api_basic.test | 18 + .../t/rocksdb_max_background_jobs_basic.test | 16 + .../t/rocksdb_max_latest_deadlocks_basic.test | 17 + .../t/rocksdb_max_log_file_size_basic.test | 6 + .../t/rocksdb_max_manifest_file_size_basic.test | 7 + .../t/rocksdb_max_manual_compactions_basic.test | 17 + .../t/rocksdb_max_open_files_basic.test | 8 + .../t/rocksdb_max_row_locks_basic.test | 17 + .../t/rocksdb_max_subcompactions_basic.test | 7 + .../t/rocksdb_max_total_wal_size_basic.test | 6 + .../t/rocksdb_merge_buf_size_basic.test | 50 + .../t/rocksdb_merge_combine_read_size_basic.test | 32 + ...ksdb_merge_tmp_file_removal_delay_ms_basic.test | 49 + ...w_table_reader_for_compaction_inputs_basic.test | 7 + .../t/rocksdb_no_block_cache_basic.test | 6 + .../t/rocksdb_override_cf_options_basic.test | 6 + .../t/rocksdb_paranoid_checks_basic.test | 7 + .../t/rocksdb_pause_background_work_basic.test | 20 + .../t/rocksdb_perf_context_level_basic.test | 18 + .../t/rocksdb_persistent_cache_path_basic.test | 16 + .../t/rocksdb_persistent_cache_size_mb_basic.test | 16 + ..._l0_filter_and_index_blocks_in_cache_basic.test | 6 + ...ksdb_print_snapshot_conflict_queries_basic.test | 18 + .../rocksdb_rate_limiter_bytes_per_sec_basic.test | 63 + .../t/rocksdb_read_free_rpl_basic.test | 19 + .../t/rocksdb_read_free_rpl_tables_basic.test | 20 + .../t/rocksdb_records_in_range_basic.test | 18 + ...ocksdb_remove_mariabackup_checkpoint_basic.test | 5 + .../t/rocksdb_reset_stats_basic.test | 21 + .../t/rocksdb_rollback_on_timeout_basic.test | 21 + ...ocksdb_seconds_between_stat_computes_basic.test | 18 + .../t/rocksdb_signal_drop_index_thread_basic.test | 19 + .../t/rocksdb_sim_cache_size_basic.test | 6 + .../t/rocksdb_skip_bloom_filter_on_read_basic.test | 18 + .../t/rocksdb_skip_fill_cache_basic.test | 18 + .../t/rocksdb_skip_unique_check_tables_basic.test | 18 + .../rocksdb_sst_mgr_rate_bytes_per_sec_basic.test | 22 + .../t/rocksdb_stats_dump_period_sec_basic.test | 6 + .../t/rocksdb_stats_level_basic.test | 21 + .../t/rocksdb_stats_recalc_rate_basic.test | 17 + .../t/rocksdb_store_row_debug_checksums_basic.test | 18 + .../t/rocksdb_strict_collation_check_basic.test | 19 + .../rocksdb_strict_collation_exceptions_basic.test | 35 + .../rocksdb_supported_compression_types_basic.test | 7 + .../t/rocksdb_table_cache_numshardbits_basic.test | 6 + .../t/rocksdb_table_stats_sampling_pct_basic.test | 22 + .../rocksdb_sys_vars/t/rocksdb_tmpdir_basic.test | 38 + .../t/rocksdb_trace_sst_api_basic.test | 18 + .../t/rocksdb_two_write_queues_basic.test | 16 + .../t/rocksdb_unsafe_for_binlog_basic.test | 18 + .../t/rocksdb_update_cf_options.test | 22 + .../t/rocksdb_update_cf_options_basic.test | 119 + .../t/rocksdb_use_adaptive_mutex_basic.test | 6 + .../t/rocksdb_use_clock_cache_basic.test | 21 + ...e_direct_io_for_flush_and_compaction_basic.test | 6 + .../t/rocksdb_use_direct_reads_basic.test | 6 + .../t/rocksdb_use_fsync_basic.test | 6 + .../t/rocksdb_validate_tables_basic.test | 6 + .../rocksdb_verify_row_debug_checksums_basic.test | 18 + .../t/rocksdb_wal_bytes_per_sync_basic.test | 22 + .../rocksdb_sys_vars/t/rocksdb_wal_dir_basic.test | 6 + .../t/rocksdb_wal_recovery_mode_basic.test | 17 + .../t/rocksdb_wal_size_limit_mb_basic.test | 6 + .../t/rocksdb_wal_ttl_seconds_basic.test | 6 + .../t/rocksdb_whole_key_filtering_basic.test | 6 + .../t/rocksdb_write_batch_max_bytes_basic.test | 26 + .../t/rocksdb_write_disable_wal_basic.test | 18 + ...write_ignore_missing_column_families_basic.test | 18 + .../t/rocksdb_write_policy_basic.test | 17 + .../mysql-test/storage_engine/cache_index.rdiff | 71 + .../storage_engine/checksum_table_live.rdiff | 13 + .../mysql-test/storage_engine/cleanup_engine.inc | 25 + .../mysql-test/storage_engine/define_engine.inc | 45 + .../rocksdb/mysql-test/storage_engine/disabled.def | 27 + .../rocksdb/mysql-test/storage_engine/index.rdiff | 60 + .../storage_engine/index_type_btree.rdiff | 60 + .../storage_engine/index_type_hash.rdiff | 60 + .../mysql-test/storage_engine/mask_engine.inc | 15 + .../rocksdb/mysql-test/storage_engine/misc.rdiff | 34 + .../storage_engine/parts/checksum_table.rdiff | 13 + .../storage_engine/parts/create_table.rdiff | 20 + .../mysql-test/storage_engine/parts/disabled.def | 3 + .../mysql-test/storage_engine/parts/suite.opt | 1 + .../mysql-test/storage_engine/show_engine.rdiff | 15 + .../storage_engine/show_table_status.rdiff | 20 + .../rocksdb/mysql-test/storage_engine/suite.opt | 1 + .../storage_engine/tbl_opt_insert_method.rdiff | 11 + .../mysql-test/storage_engine/tbl_opt_union.rdiff | 16 + .../mysql-test/storage_engine/tbl_temporary.rdiff | 24 + .../mysql-test/storage_engine/truncate_table.rdiff | 24 + .../mysql-test/storage_engine/trx/delete.rdiff | 10 + .../mysql-test/storage_engine/trx/disabled.def | 4 + .../mysql-test/storage_engine/trx/insert.rdiff | 24 + .../storage_engine/trx/level_read_committed.rdiff | 10 + .../storage_engine/trx/level_repeatable_read.rdiff | 35 + .../mysql-test/storage_engine/trx/suite.opt | 1 + .../mysql-test/storage_engine/trx/update.rdiff | 38 + .../storage_engine/type_binary_indexes.rdiff | 11 + .../storage_engine/type_bit_indexes.rdiff | 20 + .../storage_engine/type_enum_indexes.rdiff | 11 + .../storage_engine/type_set_indexes.rdiff | 20 + storage/rocksdb/nosql_access.cc | 53 + storage/rocksdb/nosql_access.h | 36 + storage/rocksdb/properties_collector.cc | 546 + storage/rocksdb/properties_collector.h | 215 + storage/rocksdb/rdb_buff.h | 549 + storage/rocksdb/rdb_cf_manager.cc | 273 + storage/rocksdb/rdb_cf_manager.h | 108 + storage/rocksdb/rdb_cf_options.cc | 341 + storage/rocksdb/rdb_cf_options.h | 104 + storage/rocksdb/rdb_compact_filter.h | 220 + storage/rocksdb/rdb_comparator.h | 85 + storage/rocksdb/rdb_converter.cc | 838 + storage/rocksdb/rdb_converter.h | 247 + storage/rocksdb/rdb_datadic.cc | 5411 +++++ storage/rocksdb/rdb_datadic.h | 1637 ++ storage/rocksdb/rdb_global.h | 392 + storage/rocksdb/rdb_i_s.cc | 1997 ++ storage/rocksdb/rdb_i_s.h | 37 + storage/rocksdb/rdb_index_merge.cc | 630 + storage/rocksdb/rdb_index_merge.h | 227 + storage/rocksdb/rdb_io_watchdog.cc | 240 + storage/rocksdb/rdb_io_watchdog.h | 119 + storage/rocksdb/rdb_mariadb_port.h | 55 + storage/rocksdb/rdb_mariadb_server_port.cc | 122 + storage/rocksdb/rdb_mariadb_server_port.h | 76 + storage/rocksdb/rdb_mutex_wrapper.cc | 214 + storage/rocksdb/rdb_mutex_wrapper.h | 143 + storage/rocksdb/rdb_perf_context.cc | 285 + storage/rocksdb/rdb_perf_context.h | 168 + storage/rocksdb/rdb_psi.cc | 115 + storage/rocksdb/rdb_psi.h | 58 + storage/rocksdb/rdb_source_revision.h | 1 + storage/rocksdb/rdb_source_revision.h.in | 1 + storage/rocksdb/rdb_sst_info.cc | 562 + storage/rocksdb/rdb_sst_info.h | 265 + storage/rocksdb/rdb_threads.cc | 83 + storage/rocksdb/rdb_threads.h | 195 + storage/rocksdb/rdb_utils.cc | 369 + storage/rocksdb/rdb_utils.h | 335 + storage/rocksdb/rocksdb-range-access.txt | 292 + storage/rocksdb/rocksdb/.clang-format | 5 + storage/rocksdb/rocksdb/.gitignore | 84 + storage/rocksdb/rocksdb/.lgtm.yml | 4 + storage/rocksdb/rocksdb/.travis.yml | 135 + storage/rocksdb/rocksdb/.watchmanconfig | 6 + storage/rocksdb/rocksdb/AUTHORS | 12 + storage/rocksdb/rocksdb/CMakeLists.txt | 1178 + storage/rocksdb/rocksdb/CODE_OF_CONDUCT.md | 77 + storage/rocksdb/rocksdb/CONTRIBUTING.md | 17 + storage/rocksdb/rocksdb/COPYING | 339 + storage/rocksdb/rocksdb/DEFAULT_OPTIONS_HISTORY.md | 24 + storage/rocksdb/rocksdb/DUMP_FORMAT.md | 16 + storage/rocksdb/rocksdb/HISTORY.md | 1137 + storage/rocksdb/rocksdb/INSTALL.md | 202 + storage/rocksdb/rocksdb/LANGUAGE-BINDINGS.md | 22 + storage/rocksdb/rocksdb/LICENSE.Apache | 202 + storage/rocksdb/rocksdb/LICENSE.leveldb | 29 + storage/rocksdb/rocksdb/Makefile | 2185 ++ storage/rocksdb/rocksdb/README.md | 31 + storage/rocksdb/rocksdb/ROCKSDB_LITE.md | 21 + storage/rocksdb/rocksdb/TARGETS | 1530 ++ storage/rocksdb/rocksdb/USERS.md | 108 + storage/rocksdb/rocksdb/Vagrantfile | 39 + storage/rocksdb/rocksdb/WINDOWS_PORT.md | 228 + storage/rocksdb/rocksdb/appveyor.yml | 75 + .../rocksdb/rocksdb/buckifier/buckify_rocksdb.py | 236 + .../rocksdb/rocksdb/buckifier/rocks_test_runner.sh | 6 + .../rocksdb/rocksdb/buckifier/targets_builder.py | 80 + storage/rocksdb/rocksdb/buckifier/targets_cfg.py | 181 + storage/rocksdb/rocksdb/buckifier/util.py | 119 + storage/rocksdb/rocksdb/build_tools/amalgamate.py | 111 + .../rocksdb/build_tools/build_detect_platform | 730 + .../rocksdb/rocksdb/build_tools/dependencies.sh | 19 + .../rocksdb/build_tools/dependencies_4.8.1.sh | 20 + .../build_tools/dependencies_platform007.sh | 20 + storage/rocksdb/rocksdb/build_tools/dockerbuild.sh | 3 + .../rocksdb/rocksdb/build_tools/error_filter.py | 177 + .../rocksdb/build_tools/fb_compile_mongo.sh | 55 + .../rocksdb/rocksdb/build_tools/fbcode_config.sh | 165 + .../rocksdb/build_tools/fbcode_config4.8.1.sh | 118 + .../build_tools/fbcode_config_platform007.sh | 170 + storage/rocksdb/rocksdb/build_tools/format-diff.sh | 138 + storage/rocksdb/rocksdb/build_tools/gnu_parallel | 7936 +++++++ .../rocksdb/rocksdb/build_tools/make_package.sh | 134 + .../rocksdb/build_tools/precommit_checker.py | 209 + .../rocksdb/build_tools/regression_build_test.sh | 414 + .../rocksdb/build_tools/rocksdb-lego-determinator | 1068 + .../rocksdb/rocksdb/build_tools/run_ci_db_test.ps1 | 487 + .../rocksdb/rocksdb/build_tools/setup_centos7.sh | 44 + .../rocksdb/build_tools/update_dependencies.sh | 182 + storage/rocksdb/rocksdb/build_tools/version.sh | 23 + storage/rocksdb/rocksdb/cache/cache_bench.cc | 281 + storage/rocksdb/rocksdb/cache/cache_test.cc | 773 + storage/rocksdb/rocksdb/cache/clock_cache.cc | 761 + storage/rocksdb/rocksdb/cache/clock_cache.h | 16 + storage/rocksdb/rocksdb/cache/lru_cache.cc | 574 + storage/rocksdb/rocksdb/cache/lru_cache.h | 339 + storage/rocksdb/rocksdb/cache/lru_cache_test.cc | 198 + storage/rocksdb/rocksdb/cache/sharded_cache.cc | 162 + storage/rocksdb/rocksdb/cache/sharded_cache.h | 111 + .../rocksdb/rocksdb/cmake/RocksDBConfig.cmake.in | 3 + .../rocksdb/cmake/modules/FindJeMalloc.cmake | 29 + .../rocksdb/rocksdb/cmake/modules/FindNUMA.cmake | 29 + .../rocksdb/rocksdb/cmake/modules/FindTBB.cmake | 33 + .../rocksdb/rocksdb/cmake/modules/Findgflags.cmake | 29 + .../rocksdb/rocksdb/cmake/modules/Findlz4.cmake | 29 + .../rocksdb/rocksdb/cmake/modules/Findsnappy.cmake | 29 + .../rocksdb/rocksdb/cmake/modules/Findzstd.cmake | 29 + .../rocksdb/cmake/modules/ReadVersion.cmake | 10 + storage/rocksdb/rocksdb/coverage/coverage_test.sh | 79 + .../rocksdb/rocksdb/coverage/parse_gcov_output.py | 119 + .../rocksdb/rocksdb/db/arena_wrapped_db_iter.cc | 106 + storage/rocksdb/rocksdb/db/arena_wrapped_db_iter.h | 112 + storage/rocksdb/rocksdb/db/blob_index.h | 179 + storage/rocksdb/rocksdb/db/builder.cc | 263 + storage/rocksdb/rocksdb/db/builder.h | 88 + storage/rocksdb/rocksdb/db/c.cc | 4451 ++++ storage/rocksdb/rocksdb/db/c_test.c | 1866 ++ storage/rocksdb/rocksdb/db/column_family.cc | 1523 ++ storage/rocksdb/rocksdb/db/column_family.h | 757 + storage/rocksdb/rocksdb/db/column_family_test.cc | 3387 +++ storage/rocksdb/rocksdb/db/compact_files_test.cc | 421 + storage/rocksdb/rocksdb/db/compacted_db_impl.cc | 160 + storage/rocksdb/rocksdb/db/compacted_db_impl.h | 113 + .../rocksdb/rocksdb/db/compaction/compaction.cc | 564 + storage/rocksdb/rocksdb/db/compaction/compaction.h | 384 + .../db/compaction/compaction_iteration_stats.h | 37 + .../rocksdb/db/compaction/compaction_iterator.cc | 774 + .../rocksdb/db/compaction/compaction_iterator.h | 240 + .../db/compaction/compaction_iterator_test.cc | 976 + .../rocksdb/db/compaction/compaction_job.cc | 1700 ++ .../rocksdb/rocksdb/db/compaction/compaction_job.h | 198 + .../db/compaction/compaction_job_stats_test.cc | 1043 + .../rocksdb/db/compaction/compaction_job_test.cc | 1082 + .../rocksdb/db/compaction/compaction_picker.cc | 1131 + .../rocksdb/db/compaction/compaction_picker.h | 313 + .../db/compaction/compaction_picker_fifo.cc | 242 + .../rocksdb/db/compaction/compaction_picker_fifo.h | 53 + .../db/compaction/compaction_picker_level.cc | 558 + .../db/compaction/compaction_picker_level.h | 32 + .../db/compaction/compaction_picker_test.cc | 1741 ++ .../db/compaction/compaction_picker_universal.cc | 1105 + .../db/compaction/compaction_picker_universal.h | 31 + storage/rocksdb/rocksdb/db/comparator_db_test.cc | 660 + storage/rocksdb/rocksdb/db/convenience.cc | 77 + storage/rocksdb/rocksdb/db/corruption_test.cc | 613 + storage/rocksdb/rocksdb/db/cuckoo_table_db_test.cc | 351 + storage/rocksdb/rocksdb/db/db_basic_test.cc | 2545 +++ storage/rocksdb/rocksdb/db/db_blob_index_test.cc | 436 + storage/rocksdb/rocksdb/db/db_block_cache_test.cc | 761 + storage/rocksdb/rocksdb/db/db_bloom_filter_test.cc | 1910 ++ .../rocksdb/db/db_compaction_filter_test.cc | 872 + storage/rocksdb/rocksdb/db/db_compaction_test.cc | 5167 +++++ .../rocksdb/rocksdb/db/db_dynamic_level_test.cc | 505 + storage/rocksdb/rocksdb/db/db_encryption_test.cc | 122 + storage/rocksdb/rocksdb/db/db_filesnapshot.cc | 177 + storage/rocksdb/rocksdb/db/db_flush_test.cc | 784 + storage/rocksdb/rocksdb/db/db_impl/db_impl.cc | 4550 ++++ storage/rocksdb/rocksdb/db/db_impl/db_impl.h | 2107 ++ .../rocksdb/db/db_impl/db_impl_compaction_flush.cc | 3116 +++ .../rocksdb/rocksdb/db/db_impl/db_impl_debug.cc | 294 + .../rocksdb/db/db_impl/db_impl_experimental.cc | 151 + .../rocksdb/rocksdb/db/db_impl/db_impl_files.cc | 667 + storage/rocksdb/rocksdb/db/db_impl/db_impl_open.cc | 1651 ++ .../rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc | 221 + .../rocksdb/rocksdb/db/db_impl/db_impl_readonly.h | 137 + .../rocksdb/db/db_impl/db_impl_secondary.cc | 671 + .../rocksdb/rocksdb/db/db_impl/db_impl_secondary.h | 333 + .../rocksdb/rocksdb/db/db_impl/db_impl_write.cc | 1839 ++ .../rocksdb/db/db_impl/db_secondary_test.cc | 869 + storage/rocksdb/rocksdb/db/db_info_dumper.cc | 123 + storage/rocksdb/rocksdb/db/db_info_dumper.h | 14 + .../rocksdb/rocksdb/db/db_inplace_update_test.cc | 177 + storage/rocksdb/rocksdb/db/db_io_failure_test.cc | 568 + storage/rocksdb/rocksdb/db/db_iter.cc | 1310 ++ storage/rocksdb/rocksdb/db/db_iter.h | 344 + storage/rocksdb/rocksdb/db/db_iter_stress_test.cc | 654 + storage/rocksdb/rocksdb/db/db_iter_test.cc | 3175 +++ storage/rocksdb/rocksdb/db/db_iterator_test.cc | 2998 +++ storage/rocksdb/rocksdb/db/db_log_iter_test.cc | 294 + storage/rocksdb/rocksdb/db/db_memtable_test.cc | 340 + .../rocksdb/rocksdb/db/db_merge_operand_test.cc | 240 + .../rocksdb/rocksdb/db/db_merge_operator_test.cc | 666 + storage/rocksdb/rocksdb/db/db_options_test.cc | 870 + storage/rocksdb/rocksdb/db/db_properties_test.cc | 1711 ++ storage/rocksdb/rocksdb/db/db_range_del_test.cc | 1660 ++ storage/rocksdb/rocksdb/db/db_sst_test.cc | 1227 + storage/rocksdb/rocksdb/db/db_statistics_test.cc | 149 + .../rocksdb/rocksdb/db/db_table_properties_test.cc | 336 + storage/rocksdb/rocksdb/db/db_tailing_iter_test.cc | 547 + storage/rocksdb/rocksdb/db/db_test.cc | 6605 ++++++ storage/rocksdb/rocksdb/db/db_test2.cc | 4695 ++++ storage/rocksdb/rocksdb/db/db_test_util.cc | 1564 ++ storage/rocksdb/rocksdb/db/db_test_util.h | 1000 + .../rocksdb/db/db_universal_compaction_test.cc | 2254 ++ storage/rocksdb/rocksdb/db/db_wal_test.cc | 1586 ++ storage/rocksdb/rocksdb/db/db_write_test.cc | 329 + storage/rocksdb/rocksdb/db/dbformat.cc | 197 + storage/rocksdb/rocksdb/db/dbformat.h | 671 + storage/rocksdb/rocksdb/db/dbformat_test.cc | 207 + storage/rocksdb/rocksdb/db/deletefile_test.cc | 571 + storage/rocksdb/rocksdb/db/error_handler.cc | 344 + storage/rocksdb/rocksdb/db/error_handler.h | 75 + storage/rocksdb/rocksdb/db/error_handler_test.cc | 871 + storage/rocksdb/rocksdb/db/event_helpers.cc | 223 + storage/rocksdb/rocksdb/db/event_helpers.h | 55 + storage/rocksdb/rocksdb/db/experimental.cc | 50 + .../rocksdb/db/external_sst_file_basic_test.cc | 1128 + .../rocksdb/db/external_sst_file_ingestion_job.cc | 731 + .../rocksdb/db/external_sst_file_ingestion_job.h | 180 + .../rocksdb/rocksdb/db/external_sst_file_test.cc | 2832 +++ storage/rocksdb/rocksdb/db/fault_injection_test.cc | 555 + storage/rocksdb/rocksdb/db/file_indexer.cc | 216 + storage/rocksdb/rocksdb/db/file_indexer.h | 142 + storage/rocksdb/rocksdb/db/file_indexer_test.cc | 350 + storage/rocksdb/rocksdb/db/filename_test.cc | 180 + storage/rocksdb/rocksdb/db/flush_job.cc | 462 + storage/rocksdb/rocksdb/db/flush_job.h | 158 + storage/rocksdb/rocksdb/db/flush_job_test.cc | 498 + storage/rocksdb/rocksdb/db/flush_scheduler.cc | 86 + storage/rocksdb/rocksdb/db/flush_scheduler.h | 54 + storage/rocksdb/rocksdb/db/forward_iterator.cc | 975 + storage/rocksdb/rocksdb/db/forward_iterator.h | 160 + .../rocksdb/rocksdb/db/forward_iterator_bench.cc | 377 + .../rocksdb/rocksdb/db/import_column_family_job.cc | 276 + .../rocksdb/rocksdb/db/import_column_family_job.h | 72 + .../rocksdb/db/import_column_family_test.cc | 567 + storage/rocksdb/rocksdb/db/internal_stats.cc | 1424 ++ storage/rocksdb/rocksdb/db/internal_stats.h | 697 + storage/rocksdb/rocksdb/db/job_context.h | 219 + storage/rocksdb/rocksdb/db/listener_test.cc | 1042 + storage/rocksdb/rocksdb/db/log_format.h | 48 + storage/rocksdb/rocksdb/db/log_reader.cc | 624 + storage/rocksdb/rocksdb/db/log_reader.h | 189 + storage/rocksdb/rocksdb/db/log_test.cc | 928 + storage/rocksdb/rocksdb/db/log_writer.cc | 162 + storage/rocksdb/rocksdb/db/log_writer.h | 114 + .../rocksdb/rocksdb/db/logs_with_prep_tracker.cc | 67 + .../rocksdb/rocksdb/db/logs_with_prep_tracker.h | 63 + storage/rocksdb/rocksdb/db/lookup_key.h | 66 + storage/rocksdb/rocksdb/db/malloc_stats.cc | 54 + storage/rocksdb/rocksdb/db/malloc_stats.h | 24 + .../rocksdb/rocksdb/db/manual_compaction_test.cc | 160 + storage/rocksdb/rocksdb/db/memtable.cc | 1122 + storage/rocksdb/rocksdb/db/memtable.h | 542 + storage/rocksdb/rocksdb/db/memtable_list.cc | 771 + storage/rocksdb/rocksdb/db/memtable_list.h | 422 + storage/rocksdb/rocksdb/db/memtable_list_test.cc | 922 + storage/rocksdb/rocksdb/db/merge_context.h | 134 + storage/rocksdb/rocksdb/db/merge_helper.cc | 417 + storage/rocksdb/rocksdb/db/merge_helper.h | 194 + storage/rocksdb/rocksdb/db/merge_helper_test.cc | 290 + storage/rocksdb/rocksdb/db/merge_operator.cc | 86 + storage/rocksdb/rocksdb/db/merge_test.cc | 504 + storage/rocksdb/rocksdb/db/obsolete_files_test.cc | 222 + storage/rocksdb/rocksdb/db/options_file_test.cc | 119 + storage/rocksdb/rocksdb/db/perf_context_test.cc | 981 + .../rocksdb/rocksdb/db/pinned_iterators_manager.h | 87 + storage/rocksdb/rocksdb/db/plain_table_db_test.cc | 1375 ++ storage/rocksdb/rocksdb/db/pre_release_callback.h | 38 + storage/rocksdb/rocksdb/db/prefix_test.cc | 895 + storage/rocksdb/rocksdb/db/range_del_aggregator.cc | 484 + storage/rocksdb/rocksdb/db/range_del_aggregator.h | 441 + .../rocksdb/db/range_del_aggregator_bench.cc | 260 + .../rocksdb/db/range_del_aggregator_test.cc | 709 + .../rocksdb/db/range_tombstone_fragmenter.cc | 439 + .../rocksdb/db/range_tombstone_fragmenter.h | 256 + .../rocksdb/db/range_tombstone_fragmenter_test.cc | 552 + storage/rocksdb/rocksdb/db/read_callback.h | 53 + storage/rocksdb/rocksdb/db/repair.cc | 691 + storage/rocksdb/rocksdb/db/repair_test.cc | 369 + storage/rocksdb/rocksdb/db/snapshot_checker.h | 61 + storage/rocksdb/rocksdb/db/snapshot_impl.cc | 26 + storage/rocksdb/rocksdb/db/snapshot_impl.h | 167 + storage/rocksdb/rocksdb/db/table_cache.cc | 668 + storage/rocksdb/rocksdb/db/table_cache.h | 226 + .../rocksdb/db/table_properties_collector.cc | 74 + .../rocksdb/db/table_properties_collector.h | 107 + .../rocksdb/db/table_properties_collector_test.cc | 515 + storage/rocksdb/rocksdb/db/transaction_log_impl.cc | 315 + storage/rocksdb/rocksdb/db/transaction_log_impl.h | 127 + .../rocksdb/rocksdb/db/trim_history_scheduler.cc | 54 + .../rocksdb/rocksdb/db/trim_history_scheduler.h | 44 + storage/rocksdb/rocksdb/db/version_builder.cc | 545 + storage/rocksdb/rocksdb/db/version_builder.h | 48 + storage/rocksdb/rocksdb/db/version_builder_test.cc | 349 + storage/rocksdb/rocksdb/db/version_edit.cc | 802 + storage/rocksdb/rocksdb/db/version_edit.h | 438 + storage/rocksdb/rocksdb/db/version_edit_test.cc | 286 + storage/rocksdb/rocksdb/db/version_set.cc | 6005 +++++ storage/rocksdb/rocksdb/db/version_set.h | 1251 ++ storage/rocksdb/rocksdb/db/version_set_test.cc | 1287 ++ storage/rocksdb/rocksdb/db/wal_manager.cc | 510 + storage/rocksdb/rocksdb/db/wal_manager.h | 114 + storage/rocksdb/rocksdb/db/wal_manager_test.cc | 338 + storage/rocksdb/rocksdb/db/write_batch.cc | 2092 ++ storage/rocksdb/rocksdb/db/write_batch_base.cc | 94 + storage/rocksdb/rocksdb/db/write_batch_internal.h | 250 + storage/rocksdb/rocksdb/db/write_batch_test.cc | 888 + storage/rocksdb/rocksdb/db/write_callback.h | 27 + storage/rocksdb/rocksdb/db/write_callback_test.cc | 452 + storage/rocksdb/rocksdb/db/write_controller.cc | 128 + storage/rocksdb/rocksdb/db/write_controller.h | 144 + .../rocksdb/rocksdb/db/write_controller_test.cc | 135 + storage/rocksdb/rocksdb/db/write_thread.cc | 777 + storage/rocksdb/rocksdb/db/write_thread.h | 431 + .../rocksdb/rocksdb/db_stress_tool/CMakeLists.txt | 14 + .../rocksdb/db_stress_tool/batched_ops_stress.cc | 341 + .../db_stress_tool/cf_consistency_stress.cc | 583 + .../rocksdb/rocksdb/db_stress_tool/db_stress.cc | 23 + .../rocksdb/db_stress_tool/db_stress_common.cc | 225 + .../rocksdb/db_stress_tool/db_stress_common.h | 511 + .../rocksdb/db_stress_tool/db_stress_driver.cc | 163 + .../rocksdb/db_stress_tool/db_stress_driver.h | 17 + .../rocksdb/db_stress_tool/db_stress_env_wrapper.h | 36 + .../rocksdb/db_stress_tool/db_stress_gflags.cc | 655 + .../rocksdb/db_stress_tool/db_stress_listener.h | 222 + .../db_stress_tool/db_stress_shared_state.cc | 18 + .../db_stress_tool/db_stress_shared_state.h | 390 + .../rocksdb/db_stress_tool/db_stress_stat.h | 213 + .../rocksdb/db_stress_tool/db_stress_test_base.cc | 2133 ++ .../rocksdb/db_stress_tool/db_stress_test_base.h | 231 + .../rocksdb/db_stress_tool/db_stress_tool.cc | 252 + .../db_stress_tool/no_batched_ops_stress.cc | 631 + storage/rocksdb/rocksdb/defs.bzl | 42 + storage/rocksdb/rocksdb/docs/.gitignore | 8 + storage/rocksdb/rocksdb/docs/CNAME | 1 + storage/rocksdb/rocksdb/docs/CONTRIBUTING.md | 115 + storage/rocksdb/rocksdb/docs/Gemfile | 2 + storage/rocksdb/rocksdb/docs/Gemfile.lock | 146 + storage/rocksdb/rocksdb/docs/LICENSE-DOCUMENTATION | 385 + storage/rocksdb/rocksdb/docs/README.md | 80 + .../rocksdb/rocksdb/docs/TEMPLATE-INFORMATION.md | 17 + storage/rocksdb/rocksdb/docs/_config.yml | 85 + storage/rocksdb/rocksdb/docs/_data/authors.yml | 70 + storage/rocksdb/rocksdb/docs/_data/features.yml | 19 + storage/rocksdb/rocksdb/docs/_data/nav.yml | 30 + storage/rocksdb/rocksdb/docs/_data/nav_docs.yml | 3 + storage/rocksdb/rocksdb/docs/_data/powered_by.yml | 1 + .../rocksdb/docs/_data/powered_by_highlight.yml | 1 + storage/rocksdb/rocksdb/docs/_data/promo.yml | 6 + storage/rocksdb/rocksdb/docs/_docs/faq.md | 48 + .../rocksdb/rocksdb/docs/_docs/getting-started.md | 78 + .../rocksdb/docs/_includes/blog_pagination.html | 28 + .../rocksdb/docs/_includes/content/gridblocks.html | 5 + .../docs/_includes/content/items/gridblock.html | 37 + storage/rocksdb/rocksdb/docs/_includes/doc.html | 25 + .../rocksdb/rocksdb/docs/_includes/doc_paging.html | 0 storage/rocksdb/rocksdb/docs/_includes/footer.html | 33 + storage/rocksdb/rocksdb/docs/_includes/head.html | 23 + storage/rocksdb/rocksdb/docs/_includes/header.html | 19 + storage/rocksdb/rocksdb/docs/_includes/hero.html | 0 .../rocksdb/docs/_includes/home_header.html | 22 + .../rocksdb/docs/_includes/katex_import.html | 3 + .../rocksdb/docs/_includes/katex_render.html | 210 + storage/rocksdb/rocksdb/docs/_includes/nav.html | 37 + .../rocksdb/docs/_includes/nav/collection_nav.html | 64 + .../docs/_includes/nav/collection_nav_group.html | 19 + .../_includes/nav/collection_nav_group_item.html | 1 + .../rocksdb/docs/_includes/nav/header_nav.html | 30 + .../rocksdb/rocksdb/docs/_includes/nav_search.html | 15 + .../rocksdb/docs/_includes/plugins/all_share.html | 3 + .../docs/_includes/plugins/ascii_cinema.html | 2 + .../rocksdb/docs/_includes/plugins/button.html | 6 + .../docs/_includes/plugins/github_star.html | 4 + .../docs/_includes/plugins/github_watch.html | 4 + .../docs/_includes/plugins/google_share.html | 5 + .../rocksdb/docs/_includes/plugins/iframe.html | 6 + .../docs/_includes/plugins/like_button.html | 18 + .../rocksdb/docs/_includes/plugins/plugin_row.html | 5 + .../_includes/plugins/post_social_plugins.html | 41 + .../rocksdb/docs/_includes/plugins/slideshow.html | 88 + .../docs/_includes/plugins/twitter_follow.html | 12 + .../docs/_includes/plugins/twitter_share.html | 11 + storage/rocksdb/rocksdb/docs/_includes/post.html | 40 + .../rocksdb/rocksdb/docs/_includes/powered_by.html | 28 + .../rocksdb/docs/_includes/social_plugins.html | 31 + .../rocksdb/rocksdb/docs/_includes/ui/button.html | 1 + storage/rocksdb/rocksdb/docs/_layouts/basic.html | 12 + storage/rocksdb/rocksdb/docs/_layouts/blog.html | 11 + .../rocksdb/docs/_layouts/blog_default.html | 14 + storage/rocksdb/rocksdb/docs/_layouts/default.html | 12 + .../rocksdb/rocksdb/docs/_layouts/doc_default.html | 14 + .../rocksdb/rocksdb/docs/_layouts/doc_page.html | 10 + storage/rocksdb/rocksdb/docs/_layouts/docs.html | 5 + storage/rocksdb/rocksdb/docs/_layouts/home.html | 17 + storage/rocksdb/rocksdb/docs/_layouts/page.html | 3 + storage/rocksdb/rocksdb/docs/_layouts/plain.html | 10 + storage/rocksdb/rocksdb/docs/_layouts/post.html | 8 + .../rocksdb/rocksdb/docs/_layouts/redirect.html | 6 + .../rocksdb/rocksdb/docs/_layouts/top-level.html | 10 + .../2014-03-27-how-to-backup-rocksdb.markdown | 135 + ...-to-persist-in-memory-rocksdb-database.markdown | 54 + ...sdb-local-meetup-held-on-march-27-2014.markdown | 53 + .../_posts/2014-04-07-rocksdb-2-8-release.markdown | 40 + ...st-files-for-better-lookup-performance.markdown | 28 + .../rocksdb/docs/_posts/2014-05-14-lock.markdown | 88 + .../_posts/2014-05-19-rocksdb-3-0-release.markdown | 24 + .../_posts/2014-05-22-rocksdb-3-1-release.markdown | 20 + ...014-06-23-plaintable-a-new-file-format.markdown | 47 + ...014-06-27-avoid-expensive-locks-in-get.markdown | 89 + .../_posts/2014-06-27-rocksdb-3-2-release.markdown | 30 + .../_posts/2014-07-29-rocksdb-3-3-release.markdown | 34 + .../rocksdb/docs/_posts/2014-09-12-cuckoo.markdown | 74 + .../2014-09-12-new-bloom-filter-format.markdown | 52 + .../_posts/2014-09-15-rocksdb-3-5-release.markdown | 38 + ...16-migrating-from-leveldb-to-rocksdb-2.markdown | 112 + ...24-reading-rocksdb-options-from-a-file.markdown | 41 + .../2015-02-27-write-batch-with-index.markdown | 20 + ...-22-integrating-rocksdb-with-mongodb-2.markdown | 16 + .../_posts/2015-06-12-rocksdb-in-osquery.markdown | 10 + .../2015-07-15-rocksdb-2015-h2-roadmap.markdown | 92 + ...2015-07-17-spatial-indexing-in-rocksdb.markdown | 78 + ...b-is-now-available-in-windows-platform.markdown | 30 + .../docs/_posts/2015-07-23-dynamic-level.markdown | 29 + .../docs/_posts/2015-10-27-getthreadlist.markdown | 193 + ...se-checkpoints-for-efficient-snapshots.markdown | 45 + ...16-analysis-file-read-latency-by-level.markdown | 244 + .../docs/_posts/2016-01-29-compaction_pri.markdown | 51 + .../_posts/2016-02-24-rocksdb-4-2-release.markdown | 41 + .../docs/_posts/2016-02-25-rocksdb-ama.markdown | 20 + .../2016-03-07-rocksdb-options-file.markdown | 24 + .../2016-04-26-rocksdb-4-5-1-released.markdown | 60 + .../2016-07-26-rocksdb-4-8-released.markdown | 48 + .../2016-09-28-rocksdb-4-11-2-released.markdown | 49 + .../2017-01-06-rocksdb-5-0-1-released.markdown | 26 + .../2017-02-07-rocksdb-5-1-2-released.markdown | 15 + .../2017-02-17-bulkoad-ingest-sst-file.markdown | 50 + .../2017-03-02-rocksdb-5-2-1-released.markdown | 22 + .../2017-05-12-partitioned-index-filter.markdown | 34 + .../_posts/2017-05-14-core-local-stats.markdown | 106 + .../2017-05-26-rocksdb-5-4-5-released.markdown | 39 + .../2017-06-26-17-level-based-changes.markdown | 60 + .../2017-06-29-rocksdb-5-5-1-released.markdown | 22 + .../2017-07-25-rocksdb-5-6-1-released.markdown | 22 + .../docs/_posts/2017-08-24-pinnableslice.markdown | 37 + .../docs/_posts/2017-08-25-flushwal.markdown | 26 + .../2017-09-28-rocksdb-5-8-released.markdown | 25 + .../2017-12-18-17-auto-tuned-rate-limiter.markdown | 28 + .../_posts/2017-12-19-write-prepared-txn.markdown | 41 + .../2018-02-05-rocksdb-5-10-2-released.markdown | 22 + .../2018-08-01-rocksdb-tuning-advisor.markdown | 58 + .../2018-08-23-data-block-hash-index.markdown | 118 + .../docs/_posts/2018-11-21-delete-range.markdown | 292 + .../_posts/2019-03-08-format-version-4.markdown | 36 + .../_posts/2019-08-15-unordered-write.markdown | 56 + storage/rocksdb/rocksdb/docs/_sass/_base.scss | 492 + storage/rocksdb/rocksdb/docs/_sass/_blog.scss | 47 + storage/rocksdb/rocksdb/docs/_sass/_buttons.scss | 47 + storage/rocksdb/rocksdb/docs/_sass/_footer.scss | 82 + storage/rocksdb/rocksdb/docs/_sass/_gridBlock.scss | 115 + storage/rocksdb/rocksdb/docs/_sass/_header.scss | 138 + storage/rocksdb/rocksdb/docs/_sass/_poweredby.scss | 69 + storage/rocksdb/rocksdb/docs/_sass/_promo.scss | 55 + .../rocksdb/docs/_sass/_react_docs_nav.scss | 332 + .../rocksdb/docs/_sass/_react_header_nav.scss | 141 + storage/rocksdb/rocksdb/docs/_sass/_reset.scss | 43 + storage/rocksdb/rocksdb/docs/_sass/_search.scss | 142 + storage/rocksdb/rocksdb/docs/_sass/_slideshow.scss | 48 + .../rocksdb/docs/_sass/_syntax-highlighting.scss | 129 + storage/rocksdb/rocksdb/docs/_sass/_tables.scss | 47 + storage/rocksdb/rocksdb/docs/_top-level/support.md | 22 + storage/rocksdb/rocksdb/docs/blog/all.html | 20 + storage/rocksdb/rocksdb/docs/blog/index.html | 12 + storage/rocksdb/rocksdb/docs/css/main.scss | 149 + .../2016-04-07-blog-post-example.md | 21 + .../docs/doc-type-examples/docs-hello-world.md | 12 + .../docs/doc-type-examples/top-level-example.md | 8 + storage/rocksdb/rocksdb/docs/docs/index.html | 6 + storage/rocksdb/rocksdb/docs/feed.xml | 30 + storage/rocksdb/rocksdb/docs/index.md | 9 + storage/rocksdb/rocksdb/docs/static/favicon.png | Bin 0 -> 3927 bytes .../rocksdb/docs/static/fonts/LatoLatin-Black.woff | Bin 0 -> 70460 bytes .../docs/static/fonts/LatoLatin-Black.woff2 | Bin 0 -> 43456 bytes .../docs/static/fonts/LatoLatin-BlackItalic.woff | Bin 0 -> 72372 bytes .../docs/static/fonts/LatoLatin-BlackItalic.woff2 | Bin 0 -> 44316 bytes .../docs/static/fonts/LatoLatin-Italic.woff | Bin 0 -> 74708 bytes .../docs/static/fonts/LatoLatin-Italic.woff2 | Bin 0 -> 45388 bytes .../rocksdb/docs/static/fonts/LatoLatin-Light.woff | Bin 0 -> 72604 bytes .../docs/static/fonts/LatoLatin-Light.woff2 | Bin 0 -> 43468 bytes .../docs/static/fonts/LatoLatin-Regular.woff | Bin 0 -> 72456 bytes .../docs/static/fonts/LatoLatin-Regular.woff2 | Bin 0 -> 43760 bytes .../images/Resize-of-20140327_200754-300x225.jpg | Bin 0 -> 26670 bytes .../rocksdb/docs/static/images/binaryseek.png | Bin 0 -> 68892 bytes .../rocksdb/docs/static/images/bloom_fp_vs_bpk.png | Bin 0 -> 51924 bytes .../docs/static/images/compaction/full-range.png | Bin 0 -> 193353 bytes .../static/images/compaction/l0-l1-contend.png | Bin 0 -> 203828 bytes .../static/images/compaction/l1-l2-contend.png | Bin 0 -> 230195 bytes .../static/images/compaction/part-range-old.png | Bin 0 -> 165547 bytes .../block-format-binary-seek.png | Bin 0 -> 68892 bytes .../block-format-hash-index.png | Bin 0 -> 31288 bytes .../hash-index-data-structure.png | Bin 0 -> 84389 bytes .../data-block-hash-index/perf-cache-miss.png | Bin 0 -> 44540 bytes .../data-block-hash-index/perf-throughput.png | Bin 0 -> 35170 bytes .../static/images/delrange/delrange_collapsed.png | Bin 0 -> 29265 bytes .../static/images/delrange/delrange_key_schema.png | Bin 0 -> 55178 bytes .../static/images/delrange/delrange_sst_blocks.png | Bin 0 -> 25596 bytes .../images/delrange/delrange_uncollapsed.png | Bin 0 -> 25358 bytes .../static/images/delrange/delrange_write_path.png | Bin 0 -> 109609 bytes .../docs/static/images/pcache-blockindex.jpg | Bin 0 -> 55324 bytes .../docs/static/images/pcache-fileindex.jpg | Bin 0 -> 54922 bytes .../docs/static/images/pcache-filelayout.jpg | Bin 0 -> 47197 bytes .../docs/static/images/pcache-readiopath.jpg | Bin 0 -> 16381 bytes .../docs/static/images/pcache-tieredstorage.jpg | Bin 0 -> 78208 bytes .../docs/static/images/pcache-writeiopath.jpg | Bin 0 -> 22616 bytes .../rocksdb/docs/static/images/promo-adapt.svg | 8 + .../rocksdb/docs/static/images/promo-flash.svg | 28 + .../docs/static/images/promo-operations.svg | 6 + .../docs/static/images/promo-performance.svg | 134 + .../rate-limiter/auto-tuned-write-KBps-series.png | Bin 0 -> 176624 bytes .../static/images/rate-limiter/write-KBps-cdf.png | Bin 0 -> 80439 bytes .../images/rate-limiter/write-KBps-series.png | Bin 0 -> 310422 bytes .../rocksdb/docs/static/images/tree_example1.png | Bin 0 -> 17804 bytes storage/rocksdb/rocksdb/docs/static/logo.svg | 76 + storage/rocksdb/rocksdb/docs/static/og_image.png | Bin 0 -> 17639 bytes .../rocksdb/rocksdb/env/composite_env_wrapper.h | 1117 + storage/rocksdb/rocksdb/env/env.cc | 475 + storage/rocksdb/rocksdb/env/env_basic_test.cc | 354 + storage/rocksdb/rocksdb/env/env_chroot.cc | 321 + storage/rocksdb/rocksdb/env/env_chroot.h | 22 + storage/rocksdb/rocksdb/env/env_encryption.cc | 937 + storage/rocksdb/rocksdb/env/env_hdfs.cc | 636 + storage/rocksdb/rocksdb/env/env_posix.cc | 527 + storage/rocksdb/rocksdb/env/env_test.cc | 1895 ++ storage/rocksdb/rocksdb/env/file_system.cc | 110 + storage/rocksdb/rocksdb/env/fs_posix.cc | 913 + storage/rocksdb/rocksdb/env/io_posix.cc | 1352 ++ storage/rocksdb/rocksdb/env/io_posix.h | 326 + storage/rocksdb/rocksdb/env/mock_env.cc | 774 + storage/rocksdb/rocksdb/env/mock_env.h | 114 + storage/rocksdb/rocksdb/env/mock_env_test.cc | 85 + storage/rocksdb/rocksdb/examples/.gitignore | 9 + storage/rocksdb/rocksdb/examples/Makefile | 53 + storage/rocksdb/rocksdb/examples/README.md | 2 + .../rocksdb/rocksdb/examples/c_simple_example.c | 79 + .../rocksdb/examples/column_families_example.cc | 72 + .../rocksdb/examples/compact_files_example.cc | 171 + .../rocksdb/examples/compaction_filter_example.cc | 88 + .../rocksdb/examples/multi_processes_example.cc | 395 + .../examples/optimistic_transaction_example.cc | 180 + .../rocksdb/examples/options_file_example.cc | 113 + .../examples/rocksdb_option_file_example.ini | 144 + storage/rocksdb/rocksdb/examples/simple_example.cc | 83 + .../rocksdb/examples/transaction_example.cc | 186 + storage/rocksdb/rocksdb/file/delete_scheduler.cc | 357 + storage/rocksdb/rocksdb/file/delete_scheduler.h | 141 + .../rocksdb/rocksdb/file/delete_scheduler_test.cc | 693 + .../rocksdb/rocksdb/file/file_prefetch_buffer.cc | 136 + .../rocksdb/rocksdb/file/file_prefetch_buffer.h | 97 + storage/rocksdb/rocksdb/file/file_util.cc | 124 + storage/rocksdb/rocksdb/file/file_util.h | 33 + storage/rocksdb/rocksdb/file/filename.cc | 456 + storage/rocksdb/rocksdb/file/filename.h | 185 + .../rocksdb/file/random_access_file_reader.cc | 189 + .../rocksdb/file/random_access_file_reader.h | 120 + storage/rocksdb/rocksdb/file/read_write_util.cc | 67 + storage/rocksdb/rocksdb/file/read_write_util.h | 34 + storage/rocksdb/rocksdb/file/readahead_raf.cc | 162 + storage/rocksdb/rocksdb/file/readahead_raf.h | 27 + .../rocksdb/rocksdb/file/sequence_file_reader.cc | 237 + .../rocksdb/rocksdb/file/sequence_file_reader.h | 67 + .../rocksdb/rocksdb/file/sst_file_manager_impl.cc | 558 + .../rocksdb/rocksdb/file/sst_file_manager_impl.h | 197 + .../rocksdb/rocksdb/file/writable_file_writer.cc | 429 + .../rocksdb/rocksdb/file/writable_file_writer.h | 171 + storage/rocksdb/rocksdb/hdfs/README | 23 + storage/rocksdb/rocksdb/hdfs/env_hdfs.h | 384 + storage/rocksdb/rocksdb/hdfs/setup.sh | 9 + .../rocksdb/include/rocksdb/advanced_options.h | 731 + storage/rocksdb/rocksdb/include/rocksdb/c.h | 1801 ++ storage/rocksdb/rocksdb/include/rocksdb/cache.h | 278 + .../rocksdb/rocksdb/include/rocksdb/cleanable.h | 71 + .../rocksdb/include/rocksdb/compaction_filter.h | 212 + .../rocksdb/include/rocksdb/compaction_job_stats.h | 96 + .../rocksdb/rocksdb/include/rocksdb/comparator.h | 122 + .../include/rocksdb/concurrent_task_limiter.h | 46 + .../rocksdb/rocksdb/include/rocksdb/convenience.h | 351 + storage/rocksdb/rocksdb/include/rocksdb/db.h | 1525 ++ .../rocksdb/include/rocksdb/db_bench_tool.h | 11 + .../rocksdb/rocksdb/include/rocksdb/db_dump_tool.h | 45 + .../rocksdb/include/rocksdb/db_stress_tool.h | 11 + storage/rocksdb/rocksdb/include/rocksdb/env.h | 1589 ++ .../rocksdb/include/rocksdb/env_encryption.h | 206 + .../rocksdb/rocksdb/include/rocksdb/experimental.h | 29 + .../rocksdb/include/rocksdb/file_checksum.h | 86 + .../rocksdb/rocksdb/include/rocksdb/file_system.h | 1358 ++ .../rocksdb/include/rocksdb/filter_policy.h | 200 + .../rocksdb/include/rocksdb/flush_block_policy.h | 61 + .../rocksdb/rocksdb/include/rocksdb/io_status.h | 232 + .../rocksdb/include/rocksdb/iostats_context.h | 56 + storage/rocksdb/rocksdb/include/rocksdb/iterator.h | 119 + storage/rocksdb/rocksdb/include/rocksdb/ldb_tool.h | 43 + storage/rocksdb/rocksdb/include/rocksdb/listener.h | 491 + .../rocksdb/include/rocksdb/memory_allocator.h | 77 + .../rocksdb/rocksdb/include/rocksdb/memtablerep.h | 385 + .../rocksdb/include/rocksdb/merge_operator.h | 257 + storage/rocksdb/rocksdb/include/rocksdb/metadata.h | 151 + storage/rocksdb/rocksdb/include/rocksdb/options.h | 1587 ++ .../rocksdb/rocksdb/include/rocksdb/perf_context.h | 232 + .../rocksdb/rocksdb/include/rocksdb/perf_level.h | 35 + .../rocksdb/include/rocksdb/persistent_cache.h | 67 + .../rocksdb/rocksdb/include/rocksdb/rate_limiter.h | 139 + .../rocksdb/include/rocksdb/rocksdb_namespace.h | 10 + storage/rocksdb/rocksdb/include/rocksdb/slice.h | 269 + .../rocksdb/include/rocksdb/slice_transform.h | 103 + storage/rocksdb/rocksdb/include/rocksdb/snapshot.h | 48 + .../rocksdb/include/rocksdb/sst_dump_tool.h | 19 + .../rocksdb/include/rocksdb/sst_file_manager.h | 132 + .../rocksdb/include/rocksdb/sst_file_reader.h | 47 + .../rocksdb/include/rocksdb/sst_file_writer.h | 139 + .../rocksdb/rocksdb/include/rocksdb/statistics.h | 548 + .../rocksdb/include/rocksdb/stats_history.h | 69 + storage/rocksdb/rocksdb/include/rocksdb/status.h | 386 + storage/rocksdb/rocksdb/include/rocksdb/table.h | 607 + .../rocksdb/include/rocksdb/table_properties.h | 250 + .../rocksdb/include/rocksdb/thread_status.h | 188 + .../rocksdb/rocksdb/include/rocksdb/threadpool.h | 58 + .../rocksdb/include/rocksdb/trace_reader_writer.h | 48 + .../rocksdb/include/rocksdb/transaction_log.h | 121 + storage/rocksdb/rocksdb/include/rocksdb/types.h | 54 + .../rocksdb/include/rocksdb/universal_compaction.h | 86 + .../include/rocksdb/utilities/backupable_db.h | 341 + .../rocksdb/include/rocksdb/utilities/checkpoint.h | 57 + .../include/rocksdb/utilities/convenience.h | 10 + .../rocksdb/include/rocksdb/utilities/db_ttl.h | 72 + .../rocksdb/include/rocksdb/utilities/debug.h | 49 + .../include/rocksdb/utilities/env_librados.h | 175 + .../rocksdb/include/rocksdb/utilities/env_mirror.h | 180 + .../include/rocksdb/utilities/info_log_finder.h | 19 + .../rocksdb/include/rocksdb/utilities/ldb_cmd.h | 277 + .../rocksdb/utilities/ldb_cmd_execute_result.h | 71 + .../include/rocksdb/utilities/leveldb_options.h | 146 + .../utilities/lua/rocks_lua_custom_library.h | 43 + .../include/rocksdb/utilities/lua/rocks_lua_util.h | 55 + .../include/rocksdb/utilities/memory_util.h | 50 + .../include/rocksdb/utilities/object_registry.h | 205 + .../rocksdb/utilities/optimistic_transaction_db.h | 98 + .../rocksdb/utilities/option_change_migration.h | 19 + .../include/rocksdb/utilities/options_util.h | 102 + .../rocksdb/include/rocksdb/utilities/sim_cache.h | 94 + .../include/rocksdb/utilities/stackable_db.h | 465 + .../utilities/table_properties_collectors.h | 74 + .../include/rocksdb/utilities/transaction.h | 540 + .../include/rocksdb/utilities/transaction_db.h | 309 + .../rocksdb/utilities/transaction_db_mutex.h | 92 + .../rocksdb/include/rocksdb/utilities/utility_db.h | 34 + .../rocksdb/utilities/write_batch_with_index.h | 278 + storage/rocksdb/rocksdb/include/rocksdb/version.h | 16 + .../rocksdb/rocksdb/include/rocksdb/wal_filter.h | 102 + .../rocksdb/rocksdb/include/rocksdb/write_batch.h | 377 + .../rocksdb/include/rocksdb/write_batch_base.h | 127 + .../rocksdb/include/rocksdb/write_buffer_manager.h | 102 + storage/rocksdb/rocksdb/issue_template.md | 7 + storage/rocksdb/rocksdb/java/CMakeLists.txt | 502 + storage/rocksdb/rocksdb/java/HISTORY-JAVA.md | 86 + storage/rocksdb/rocksdb/java/Makefile | 321 + storage/rocksdb/rocksdb/java/RELEASE.md | 59 + .../java/org/rocksdb/benchmark/DbBenchmark.java | 1653 ++ .../rocksdb/rocksdb/java/crossbuild/Vagrantfile | 51 + .../rocksdb/java/crossbuild/build-linux-alpine.sh | 70 + .../rocksdb/java/crossbuild/build-linux-centos.sh | 38 + .../rocksdb/rocksdb/java/crossbuild/build-linux.sh | 15 + .../java/crossbuild/docker-build-linux-alpine.sh | 18 + .../java/crossbuild/docker-build-linux-centos.sh | 34 + storage/rocksdb/rocksdb/java/jdb_bench.sh | 13 + .../rocksdb/rocksdb/java/jmh/LICENSE-HEADER.txt | 5 + storage/rocksdb/rocksdb/java/jmh/README.md | 18 + storage/rocksdb/rocksdb/java/jmh/pom.xml | 138 + .../java/org/rocksdb/jmh/ComparatorBenchmarks.java | 139 + .../main/java/org/rocksdb/jmh/GetBenchmarks.java | 139 + .../java/org/rocksdb/jmh/MultiGetBenchmarks.java | 158 + .../main/java/org/rocksdb/jmh/PutBenchmarks.java | 112 + .../src/main/java/org/rocksdb/util/FileUtils.java | 59 + .../src/main/java/org/rocksdb/util/KVUtils.java | 58 + storage/rocksdb/rocksdb/java/rocksjni.pom | 150 + .../rocksdb/rocksdb/java/rocksjni/backupablejni.cc | 363 + .../rocksdb/java/rocksjni/backupenginejni.cc | 277 + .../java/rocksjni/cassandra_compactionfilterjni.cc | 24 + .../java/rocksjni/cassandra_value_operator.cc | 48 + .../rocksdb/rocksdb/java/rocksjni/checkpoint.cc | 68 + .../rocksdb/rocksdb/java/rocksjni/clock_cache.cc | 40 + .../rocksdb/java/rocksjni/columnfamilyhandle.cc | 72 + .../rocksdb/java/rocksjni/compact_range_options.cc | 211 + .../rocksdb/java/rocksjni/compaction_filter.cc | 28 + .../java/rocksjni/compaction_filter_factory.cc | 40 + .../compaction_filter_factory_jnicallback.cc | 76 + .../compaction_filter_factory_jnicallback.h | 35 + .../rocksdb/java/rocksjni/compaction_job_info.cc | 231 + .../rocksdb/java/rocksjni/compaction_job_stats.cc | 361 + .../rocksdb/java/rocksjni/compaction_options.cc | 116 + .../java/rocksjni/compaction_options_fifo.cc | 81 + .../java/rocksjni/compaction_options_universal.cc | 209 + .../rocksdb/rocksdb/java/rocksjni/comparator.cc | 57 + .../rocksdb/java/rocksjni/comparatorjnicallback.cc | 638 + .../rocksdb/java/rocksjni/comparatorjnicallback.h | 137 + .../rocksdb/java/rocksjni/compression_options.cc | 164 + storage/rocksdb/rocksdb/java/rocksjni/env.cc | 238 + .../rocksdb/rocksdb/java/rocksjni/env_options.cc | 298 + storage/rocksdb/rocksdb/java/rocksjni/filter.cc | 45 + .../java/rocksjni/ingest_external_file_options.cc | 196 + storage/rocksdb/rocksdb/java/rocksjni/iterator.cc | 252 + .../rocksdb/rocksdb/java/rocksjni/jnicallback.cc | 53 + .../rocksdb/rocksdb/java/rocksjni/jnicallback.h | 31 + .../rocksdb/java/rocksjni/loggerjnicallback.cc | 297 + .../rocksdb/java/rocksjni/loggerjnicallback.h | 49 + storage/rocksdb/rocksdb/java/rocksjni/lru_cache.cc | 43 + .../rocksdb/rocksdb/java/rocksjni/memory_util.cc | 107 + .../rocksdb/rocksdb/java/rocksjni/memtablejni.cc | 93 + .../rocksdb/java/rocksjni/merge_operator.cc | 81 + .../rocksjni/native_comparator_wrapper_test.cc | 44 + .../java/rocksjni/optimistic_transaction_db.cc | 284 + .../rocksjni/optimistic_transaction_options.cc | 78 + storage/rocksdb/rocksdb/java/rocksjni/options.cc | 7240 ++++++ .../rocksdb/rocksdb/java/rocksjni/options_util.cc | 134 + .../rocksdb/java/rocksjni/persistent_cache.cc | 57 + storage/rocksdb/rocksdb/java/rocksjni/portal.h | 7534 +++++++ .../rocksdb/java/rocksjni/ratelimiterjni.cc | 127 + .../remove_emptyvalue_compactionfilterjni.cc | 23 + .../rocksdb/rocksdb/java/rocksjni/restorejni.cc | 40 + .../rocksdb/java/rocksjni/rocks_callback_object.cc | 31 + .../java/rocksjni/rocksdb_exception_test.cc | 82 + storage/rocksdb/rocksdb/java/rocksjni/rocksjni.cc | 3406 +++ storage/rocksdb/rocksdb/java/rocksjni/slice.cc | 360 + storage/rocksdb/rocksdb/java/rocksjni/snapshot.cc | 27 + .../rocksdb/java/rocksjni/sst_file_manager.cc | 247 + .../java/rocksjni/sst_file_reader_iterator.cc | 253 + .../rocksdb/java/rocksjni/sst_file_readerjni.cc | 116 + .../rocksdb/java/rocksjni/sst_file_writerjni.cc | 308 + .../rocksdb/rocksdb/java/rocksjni/statistics.cc | 264 + .../rocksdb/rocksdb/java/rocksjni/statisticsjni.cc | 32 + .../rocksdb/rocksdb/java/rocksjni/statisticsjni.h | 34 + storage/rocksdb/rocksdb/java/rocksjni/table.cc | 150 + .../rocksdb/rocksdb/java/rocksjni/table_filter.cc | 25 + .../java/rocksjni/table_filter_jnicallback.cc | 66 + .../java/rocksjni/table_filter_jnicallback.h | 36 + .../rocksdb/rocksdb/java/rocksjni/thread_status.cc | 125 + .../rocksdb/rocksdb/java/rocksjni/trace_writer.cc | 23 + .../java/rocksjni/trace_writer_jnicallback.cc | 115 + .../java/rocksjni/trace_writer_jnicallback.h | 36 + .../rocksdb/rocksdb/java/rocksjni/transaction.cc | 1646 ++ .../rocksdb/java/rocksjni/transaction_db.cc | 463 + .../java/rocksjni/transaction_db_options.cc | 170 + .../rocksdb/java/rocksjni/transaction_log.cc | 79 + .../rocksdb/java/rocksjni/transaction_notifier.cc | 43 + .../rocksjni/transaction_notifier_jnicallback.cc | 39 + .../rocksjni/transaction_notifier_jnicallback.h | 42 + .../rocksdb/java/rocksjni/transaction_options.cc | 191 + storage/rocksdb/rocksdb/java/rocksjni/ttl.cc | 207 + .../rocksdb/rocksdb/java/rocksjni/wal_filter.cc | 23 + .../java/rocksjni/wal_filter_jnicallback.cc | 144 + .../rocksdb/java/rocksjni/wal_filter_jnicallback.h | 42 + .../rocksdb/rocksdb/java/rocksjni/write_batch.cc | 674 + .../rocksdb/java/rocksjni/write_batch_test.cc | 198 + .../java/rocksjni/write_batch_with_index.cc | 862 + .../rocksdb/java/rocksjni/write_buffer_manager.cc | 42 + .../java/rocksjni/writebatchhandlerjnicallback.cc | 548 + .../java/rocksjni/writebatchhandlerjnicallback.h | 89 + .../src/main/java/OptimisticTransactionSample.java | 184 + .../src/main/java/RocksDBColumnFamilySample.java | 78 + .../java/samples/src/main/java/RocksDBSample.java | 303 + .../samples/src/main/java/TransactionSample.java | 183 + .../java/org/rocksdb/AbstractCompactionFilter.java | 59 + .../rocksdb/AbstractCompactionFilterFactory.java | 77 + .../main/java/org/rocksdb/AbstractComparator.java | 124 + .../org/rocksdb/AbstractComparatorJniBridge.java | 125 + .../rocksdb/AbstractImmutableNativeReference.java | 67 + .../java/org/rocksdb/AbstractMutableOptions.java | 256 + .../java/org/rocksdb/AbstractNativeReference.java | 76 + .../java/org/rocksdb/AbstractRocksIterator.java | 126 + .../src/main/java/org/rocksdb/AbstractSlice.java | 191 + .../main/java/org/rocksdb/AbstractTableFilter.java | 20 + .../main/java/org/rocksdb/AbstractTraceWriter.java | 70 + .../org/rocksdb/AbstractTransactionNotifier.java | 54 + .../main/java/org/rocksdb/AbstractWalFilter.java | 49 + .../main/java/org/rocksdb/AbstractWriteBatch.java | 216 + .../java/src/main/java/org/rocksdb/AccessHint.java | 53 + .../AdvancedColumnFamilyOptionsInterface.java | 465 + ...dvancedMutableColumnFamilyOptionsInterface.java | 464 + .../src/main/java/org/rocksdb/BackupEngine.java | 259 + .../java/src/main/java/org/rocksdb/BackupInfo.java | 76 + .../main/java/org/rocksdb/BackupableDBOptions.java | 465 + .../java/org/rocksdb/BlockBasedTableConfig.java | 987 + .../src/main/java/org/rocksdb/BloomFilter.java | 79 + .../main/java/org/rocksdb/BuiltinComparator.java | 20 + .../java/src/main/java/org/rocksdb/Cache.java | 13 + .../org/rocksdb/CassandraCompactionFilter.java | 19 + .../org/rocksdb/CassandraValueMergeOperator.java | 25 + .../java/src/main/java/org/rocksdb/Checkpoint.java | 66 + .../src/main/java/org/rocksdb/ChecksumType.java | 39 + .../java/src/main/java/org/rocksdb/ClockCache.java | 59 + .../java/org/rocksdb/ColumnFamilyDescriptor.java | 109 + .../main/java/org/rocksdb/ColumnFamilyHandle.java | 115 + .../java/org/rocksdb/ColumnFamilyMetaData.java | 70 + .../main/java/org/rocksdb/ColumnFamilyOptions.java | 1001 + .../org/rocksdb/ColumnFamilyOptionsInterface.java | 449 + .../main/java/org/rocksdb/CompactRangeOptions.java | 237 + .../main/java/org/rocksdb/CompactionJobInfo.java | 159 + .../main/java/org/rocksdb/CompactionJobStats.java | 295 + .../main/java/org/rocksdb/CompactionOptions.java | 121 + .../java/org/rocksdb/CompactionOptionsFIFO.java | 89 + .../org/rocksdb/CompactionOptionsUniversal.java | 273 + .../main/java/org/rocksdb/CompactionPriority.java | 73 + .../main/java/org/rocksdb/CompactionReason.java | 115 + .../main/java/org/rocksdb/CompactionStopStyle.java | 55 + .../src/main/java/org/rocksdb/CompactionStyle.java | 80 + .../main/java/org/rocksdb/ComparatorOptions.java | 133 + .../src/main/java/org/rocksdb/ComparatorType.java | 48 + .../main/java/org/rocksdb/CompressionOptions.java | 151 + .../src/main/java/org/rocksdb/CompressionType.java | 99 + .../java/src/main/java/org/rocksdb/DBOptions.java | 1403 ++ .../main/java/org/rocksdb/DBOptionsInterface.java | 1564 ++ .../main/java/org/rocksdb/DataBlockIndexType.java | 32 + .../java/src/main/java/org/rocksdb/DbPath.java | 47 + .../src/main/java/org/rocksdb/DirectSlice.java | 132 + .../src/main/java/org/rocksdb/EncodingType.java | 55 + .../java/src/main/java/org/rocksdb/Env.java | 167 + .../java/src/main/java/org/rocksdb/EnvOptions.java | 366 + .../src/main/java/org/rocksdb/Experimental.java | 23 + .../java/src/main/java/org/rocksdb/Filter.java | 36 + .../src/main/java/org/rocksdb/FlushOptions.java | 90 + .../org/rocksdb/HashLinkedListMemTableConfig.java | 174 + .../org/rocksdb/HashSkipListMemTableConfig.java | 106 + .../java/src/main/java/org/rocksdb/HdfsEnv.java | 27 + .../src/main/java/org/rocksdb/HistogramData.java | 75 + .../src/main/java/org/rocksdb/HistogramType.java | 198 + .../java/src/main/java/org/rocksdb/Holder.java | 46 + .../java/src/main/java/org/rocksdb/IndexType.java | 41 + .../src/main/java/org/rocksdb/InfoLogLevel.java | 49 + .../org/rocksdb/IngestExternalFileOptions.java | 227 + .../java/src/main/java/org/rocksdb/LRUCache.java | 82 + .../src/main/java/org/rocksdb/LevelMetaData.java | 56 + .../main/java/org/rocksdb/LiveFileMetaData.java | 55 + .../java/src/main/java/org/rocksdb/LogFile.java | 75 + .../java/src/main/java/org/rocksdb/Logger.java | 122 + .../src/main/java/org/rocksdb/MemTableConfig.java | 29 + .../src/main/java/org/rocksdb/MemoryUsageType.java | 72 + .../java/src/main/java/org/rocksdb/MemoryUtil.java | 60 + .../src/main/java/org/rocksdb/MergeOperator.java | 18 + .../org/rocksdb/MutableColumnFamilyOptions.java | 469 + .../MutableColumnFamilyOptionsInterface.java | 158 + .../main/java/org/rocksdb/MutableDBOptions.java | 325 + .../org/rocksdb/MutableDBOptionsInterface.java | 443 + .../main/java/org/rocksdb/MutableOptionKey.java | 16 + .../main/java/org/rocksdb/MutableOptionValue.java | 376 + .../java/org/rocksdb/NativeComparatorWrapper.java | 59 + .../main/java/org/rocksdb/NativeLibraryLoader.java | 125 + .../src/main/java/org/rocksdb/OperationStage.java | 59 + .../src/main/java/org/rocksdb/OperationType.java | 54 + .../java/org/rocksdb/OptimisticTransactionDB.java | 226 + .../org/rocksdb/OptimisticTransactionOptions.java | 53 + .../java/src/main/java/org/rocksdb/Options.java | 2183 ++ .../src/main/java/org/rocksdb/OptionsUtil.java | 142 + .../src/main/java/org/rocksdb/PersistentCache.java | 26 + .../main/java/org/rocksdb/PlainTableConfig.java | 251 + .../java/src/main/java/org/rocksdb/Priority.java | 49 + .../java/src/main/java/org/rocksdb/Range.java | 19 + .../src/main/java/org/rocksdb/RateLimiter.java | 227 + .../src/main/java/org/rocksdb/RateLimiterMode.java | 52 + .../src/main/java/org/rocksdb/ReadOptions.java | 622 + .../java/src/main/java/org/rocksdb/ReadTier.java | 49 + .../rocksdb/RemoveEmptyValueCompactionFilter.java | 18 + .../src/main/java/org/rocksdb/RestoreOptions.java | 32 + .../org/rocksdb/ReusedSynchronisationType.java | 65 + .../main/java/org/rocksdb/RocksCallbackObject.java | 50 + .../java/src/main/java/org/rocksdb/RocksDB.java | 4522 ++++ .../main/java/org/rocksdb/RocksDBException.java | 44 + .../java/src/main/java/org/rocksdb/RocksEnv.java | 32 + .../src/main/java/org/rocksdb/RocksIterator.java | 118 + .../java/org/rocksdb/RocksIteratorInterface.java | 117 + .../src/main/java/org/rocksdb/RocksMemEnv.java | 39 + .../main/java/org/rocksdb/RocksMutableObject.java | 87 + .../src/main/java/org/rocksdb/RocksObject.java | 41 + .../java/org/rocksdb/SizeApproximationFlag.java | 31 + .../java/org/rocksdb/SkipListMemTableConfig.java | 51 + .../java/src/main/java/org/rocksdb/Slice.java | 136 + .../java/src/main/java/org/rocksdb/Snapshot.java | 41 + .../src/main/java/org/rocksdb/SstFileManager.java | 251 + .../src/main/java/org/rocksdb/SstFileMetaData.java | 162 + .../src/main/java/org/rocksdb/SstFileReader.java | 82 + .../java/org/rocksdb/SstFileReaderIterator.java | 120 + .../src/main/java/org/rocksdb/SstFileWriter.java | 290 + .../java/src/main/java/org/rocksdb/StateType.java | 53 + .../java/src/main/java/org/rocksdb/Statistics.java | 152 + .../main/java/org/rocksdb/StatisticsCollector.java | 111 + .../org/rocksdb/StatisticsCollectorCallback.java | 32 + .../main/java/org/rocksdb/StatsCollectorInput.java | 35 + .../java/src/main/java/org/rocksdb/StatsLevel.java | 65 + .../java/src/main/java/org/rocksdb/Status.java | 138 + .../java/org/rocksdb/StringAppendOperator.java | 24 + .../src/main/java/org/rocksdb/TableFilter.java | 21 + .../main/java/org/rocksdb/TableFormatConfig.java | 22 + .../src/main/java/org/rocksdb/TableProperties.java | 366 + .../src/main/java/org/rocksdb/ThreadStatus.java | 224 + .../java/src/main/java/org/rocksdb/ThreadType.java | 65 + .../java/src/main/java/org/rocksdb/TickerType.java | 760 + .../java/src/main/java/org/rocksdb/TimedEnv.java | 30 + .../src/main/java/org/rocksdb/TraceOptions.java | 32 + .../src/main/java/org/rocksdb/TraceWriter.java | 36 + .../src/main/java/org/rocksdb/Transaction.java | 2012 ++ .../src/main/java/org/rocksdb/TransactionDB.java | 404 + .../java/org/rocksdb/TransactionDBOptions.java | 217 + .../java/org/rocksdb/TransactionLogIterator.java | 112 + .../main/java/org/rocksdb/TransactionOptions.java | 189 + .../src/main/java/org/rocksdb/TransactionalDB.java | 68 + .../java/org/rocksdb/TransactionalOptions.java | 31 + .../java/src/main/java/org/rocksdb/TtlDB.java | 245 + .../main/java/org/rocksdb/TxnDBWritePolicy.java | 62 + .../main/java/org/rocksdb/UInt64AddOperator.java | 19 + .../java/org/rocksdb/VectorMemTableConfig.java | 46 + .../src/main/java/org/rocksdb/WALRecoveryMode.java | 83 + .../main/java/org/rocksdb/WBWIRocksIterator.java | 197 + .../src/main/java/org/rocksdb/WalFileType.java | 55 + .../java/src/main/java/org/rocksdb/WalFilter.java | 87 + .../main/java/org/rocksdb/WalProcessingOption.java | 54 + .../java/src/main/java/org/rocksdb/WriteBatch.java | 394 + .../main/java/org/rocksdb/WriteBatchInterface.java | 305 + .../main/java/org/rocksdb/WriteBatchWithIndex.java | 318 + .../main/java/org/rocksdb/WriteBufferManager.java | 33 + .../src/main/java/org/rocksdb/WriteOptions.java | 219 + .../src/main/java/org/rocksdb/util/ByteUtil.java | 46 + .../java/org/rocksdb/util/BytewiseComparator.java | 121 + .../main/java/org/rocksdb/util/Environment.java | 152 + .../main/java/org/rocksdb/util/IntComparator.java | 67 + .../rocksdb/util/ReverseBytewiseComparator.java | 88 + .../src/main/java/org/rocksdb/util/SizeUnit.java | 16 + .../java/org/rocksdb/AbstractTransactionTest.java | 902 + .../test/java/org/rocksdb/BackupEngineTest.java | 261 + .../java/org/rocksdb/BackupableDBOptionsTest.java | 351 + .../org/rocksdb/BlockBasedTableConfigTest.java | 393 + .../java/org/rocksdb/BuiltinComparatorTest.java | 145 + .../src/test/java/org/rocksdb/CheckPointTest.java | 83 + .../src/test/java/org/rocksdb/ClockCacheTest.java | 26 + .../java/org/rocksdb/ColumnFamilyOptionsTest.java | 625 + .../test/java/org/rocksdb/ColumnFamilyTest.java | 734 + .../java/org/rocksdb/CompactRangeOptionsTest.java | 98 + .../org/rocksdb/CompactionFilterFactoryTest.java | 68 + .../java/org/rocksdb/CompactionJobInfoTest.java | 114 + .../java/org/rocksdb/CompactionJobStatsTest.java | 196 + .../org/rocksdb/CompactionOptionsFIFOTest.java | 35 + .../java/org/rocksdb/CompactionOptionsTest.java | 52 + .../rocksdb/CompactionOptionsUniversalTest.java | 80 + .../java/org/rocksdb/CompactionPriorityTest.java | 31 + .../java/org/rocksdb/CompactionStopStyleTest.java | 31 + .../java/org/rocksdb/ComparatorOptionsTest.java | 58 + .../java/org/rocksdb/CompressionOptionsTest.java | 71 + .../java/org/rocksdb/CompressionTypesTest.java | 20 + .../src/test/java/org/rocksdb/DBOptionsTest.java | 813 + .../src/test/java/org/rocksdb/DefaultEnvTest.java | 113 + .../src/test/java/org/rocksdb/DirectSliceTest.java | 93 + .../src/test/java/org/rocksdb/EnvOptionsTest.java | 145 + .../java/src/test/java/org/rocksdb/FilterTest.java | 39 + .../test/java/org/rocksdb/FlushOptionsTest.java | 31 + .../java/src/test/java/org/rocksdb/FlushTest.java | 49 + .../src/test/java/org/rocksdb/HdfsEnvTest.java | 45 + .../test/java/org/rocksdb/InfoLogLevelTest.java | 109 + .../org/rocksdb/IngestExternalFileOptionsTest.java | 107 + .../src/test/java/org/rocksdb/KeyMayExistTest.java | 192 + .../src/test/java/org/rocksdb/LRUCacheTest.java | 27 + .../java/src/test/java/org/rocksdb/LoggerTest.java | 239 + .../src/test/java/org/rocksdb/MemTableTest.java | 111 + .../src/test/java/org/rocksdb/MemoryUtilTest.java | 143 + .../java/src/test/java/org/rocksdb/MergeTest.java | 440 + .../test/java/org/rocksdb/MixedOptionsTest.java | 55 + .../rocksdb/MutableColumnFamilyOptionsTest.java | 88 + .../java/org/rocksdb/MutableDBOptionsTest.java | 85 + .../org/rocksdb/NativeComparatorWrapperTest.java | 92 + .../java/org/rocksdb/NativeLibraryLoaderTest.java | 41 + .../org/rocksdb/OptimisticTransactionDBTest.java | 131 + .../rocksdb/OptimisticTransactionOptionsTest.java | 38 + .../org/rocksdb/OptimisticTransactionTest.java | 350 + .../src/test/java/org/rocksdb/OptionsTest.java | 1311 ++ .../src/test/java/org/rocksdb/OptionsUtilTest.java | 126 + .../java/org/rocksdb/PlainTableConfigTest.java | 89 + .../java/org/rocksdb/PlatformRandomHelper.java | 58 + .../src/test/java/org/rocksdb/RateLimiterTest.java | 65 + .../src/test/java/org/rocksdb/ReadOnlyTest.java | 305 + .../src/test/java/org/rocksdb/ReadOptionsTest.java | 323 + .../java/org/rocksdb/RocksDBExceptionTest.java | 115 + .../src/test/java/org/rocksdb/RocksDBTest.java | 1669 ++ .../test/java/org/rocksdb/RocksIteratorTest.java | 203 + .../src/test/java/org/rocksdb/RocksMemEnvTest.java | 146 + .../org/rocksdb/RocksNativeLibraryResource.java | 18 + .../java/src/test/java/org/rocksdb/SliceTest.java | 80 + .../src/test/java/org/rocksdb/SnapshotTest.java | 169 + .../test/java/org/rocksdb/SstFileManagerTest.java | 66 + .../test/java/org/rocksdb/SstFileReaderTest.java | 155 + .../test/java/org/rocksdb/SstFileWriterTest.java | 241 + .../java/org/rocksdb/StatisticsCollectorTest.java | 55 + .../src/test/java/org/rocksdb/StatisticsTest.java | 168 + .../test/java/org/rocksdb/StatsCallbackMock.java | 20 + .../src/test/java/org/rocksdb/TableFilterTest.java | 106 + .../src/test/java/org/rocksdb/TimedEnvTest.java | 43 + .../java/org/rocksdb/TransactionDBOptionsTest.java | 64 + .../test/java/org/rocksdb/TransactionDBTest.java | 178 + .../org/rocksdb/TransactionLogIteratorTest.java | 139 + .../java/org/rocksdb/TransactionOptionsTest.java | 72 + .../src/test/java/org/rocksdb/TransactionTest.java | 308 + .../java/src/test/java/org/rocksdb/TtlDBTest.java | 112 + .../java/src/test/java/org/rocksdb/Types.java | 43 + .../test/java/org/rocksdb/WALRecoveryModeTest.java | 22 + .../src/test/java/org/rocksdb/WalFilterTest.java | 165 + .../java/org/rocksdb/WriteBatchHandlerTest.java | 76 + .../src/test/java/org/rocksdb/WriteBatchTest.java | 528 + .../java/org/rocksdb/WriteBatchThreadedTest.java | 104 + .../java/org/rocksdb/WriteBatchWithIndexTest.java | 566 + .../test/java/org/rocksdb/WriteOptionsTest.java | 69 + .../RemoveEmptyValueCompactionFilterFactory.java | 21 + .../java/org/rocksdb/test/RocksJunitRunner.java | 174 + .../rocksdb/util/BytewiseComparatorIntTest.java | 267 + .../org/rocksdb/util/BytewiseComparatorTest.java | 519 + .../rocksdb/util/CapturingWriteBatchHandler.java | 172 + .../java/org/rocksdb/util/EnvironmentTest.java | 259 + .../java/org/rocksdb/util/IntComparatorTest.java | 266 + .../java/org/rocksdb/util/JNIComparatorTest.java | 174 + .../util/ReverseBytewiseComparatorIntTest.java | 270 + .../test/java/org/rocksdb/util/SizeUnitTest.java | 27 + .../src/test/java/org/rocksdb/util/TestUtil.java | 61 + .../java/org/rocksdb/util/WriteBatchGetter.java | 134 + .../rocksdb/rocksdb/logging/auto_roll_logger.cc | 292 + storage/rocksdb/rocksdb/logging/auto_roll_logger.h | 164 + .../rocksdb/logging/auto_roll_logger_test.cc | 685 + storage/rocksdb/rocksdb/logging/env_logger.h | 165 + storage/rocksdb/rocksdb/logging/env_logger_test.cc | 162 + storage/rocksdb/rocksdb/logging/event_logger.cc | 71 + storage/rocksdb/rocksdb/logging/event_logger.h | 203 + .../rocksdb/rocksdb/logging/event_logger_test.cc | 43 + storage/rocksdb/rocksdb/logging/log_buffer.cc | 92 + storage/rocksdb/rocksdb/logging/log_buffer.h | 56 + storage/rocksdb/rocksdb/logging/logging.h | 68 + storage/rocksdb/rocksdb/logging/posix_logger.h | 185 + storage/rocksdb/rocksdb/memory/allocator.h | 57 + storage/rocksdb/rocksdb/memory/arena.cc | 233 + storage/rocksdb/rocksdb/memory/arena.h | 141 + storage/rocksdb/rocksdb/memory/arena_test.cc | 204 + storage/rocksdb/rocksdb/memory/concurrent_arena.cc | 47 + storage/rocksdb/rocksdb/memory/concurrent_arena.h | 215 + .../rocksdb/memory/jemalloc_nodump_allocator.cc | 206 + .../rocksdb/memory/jemalloc_nodump_allocator.h | 78 + storage/rocksdb/rocksdb/memory/memory_allocator.h | 38 + storage/rocksdb/rocksdb/memory/memory_usage.h | 25 + storage/rocksdb/rocksdb/memtable/alloc_tracker.cc | 62 + .../rocksdb/rocksdb/memtable/hash_linklist_rep.cc | 844 + .../rocksdb/rocksdb/memtable/hash_linklist_rep.h | 49 + .../rocksdb/rocksdb/memtable/hash_skiplist_rep.cc | 349 + .../rocksdb/rocksdb/memtable/hash_skiplist_rep.h | 44 + storage/rocksdb/rocksdb/memtable/inlineskiplist.h | 997 + .../rocksdb/memtable/inlineskiplist_test.cc | 663 + .../rocksdb/rocksdb/memtable/memtablerep_bench.cc | 678 + storage/rocksdb/rocksdb/memtable/skiplist.h | 496 + storage/rocksdb/rocksdb/memtable/skiplist_test.cc | 388 + storage/rocksdb/rocksdb/memtable/skiplistrep.cc | 280 + storage/rocksdb/rocksdb/memtable/stl_wrappers.h | 33 + storage/rocksdb/rocksdb/memtable/vectorrep.cc | 301 + .../rocksdb/memtable/write_buffer_manager.cc | 130 + .../rocksdb/memtable/write_buffer_manager_test.cc | 155 + .../rocksdb/rocksdb/monitoring/file_read_sample.h | 23 + storage/rocksdb/rocksdb/monitoring/histogram.cc | 288 + storage/rocksdb/rocksdb/monitoring/histogram.h | 149 + .../rocksdb/rocksdb/monitoring/histogram_test.cc | 221 + .../rocksdb/monitoring/histogram_windowing.cc | 202 + .../rocksdb/monitoring/histogram_windowing.h | 80 + .../rocksdb/monitoring/in_memory_stats_history.cc | 49 + .../rocksdb/monitoring/in_memory_stats_history.h | 74 + .../rocksdb/monitoring/instrumented_mutex.cc | 69 + .../rocksdb/monitoring/instrumented_mutex.h | 98 + .../rocksdb/rocksdb/monitoring/iostats_context.cc | 62 + .../rocksdb/monitoring/iostats_context_imp.h | 60 + .../rocksdb/monitoring/iostats_context_test.cc | 29 + storage/rocksdb/rocksdb/monitoring/perf_context.cc | 559 + .../rocksdb/rocksdb/monitoring/perf_context_imp.h | 97 + storage/rocksdb/rocksdb/monitoring/perf_level.cc | 28 + .../rocksdb/rocksdb/monitoring/perf_level_imp.h | 18 + .../rocksdb/rocksdb/monitoring/perf_step_timer.h | 79 + .../rocksdb/monitoring/persistent_stats_history.cc | 170 + .../rocksdb/monitoring/persistent_stats_history.h | 83 + storage/rocksdb/rocksdb/monitoring/statistics.cc | 406 + storage/rocksdb/rocksdb/monitoring/statistics.h | 138 + .../rocksdb/rocksdb/monitoring/statistics_test.cc | 47 + .../rocksdb/monitoring/stats_history_test.cc | 653 + .../rocksdb/monitoring/thread_status_impl.cc | 163 + .../rocksdb/monitoring/thread_status_updater.cc | 314 + .../rocksdb/monitoring/thread_status_updater.h | 233 + .../monitoring/thread_status_updater_debug.cc | 42 + .../rocksdb/monitoring/thread_status_util.cc | 206 + .../rocksdb/monitoring/thread_status_util.h | 134 + .../rocksdb/monitoring/thread_status_util_debug.cc | 32 + storage/rocksdb/rocksdb/options/cf_options.cc | 231 + storage/rocksdb/rocksdb/options/cf_options.h | 269 + storage/rocksdb/rocksdb/options/db_options.cc | 333 + storage/rocksdb/rocksdb/options/db_options.h | 118 + storage/rocksdb/rocksdb/options/options.cc | 623 + storage/rocksdb/rocksdb/options/options_helper.cc | 2124 ++ storage/rocksdb/rocksdb/options/options_helper.h | 233 + storage/rocksdb/rocksdb/options/options_parser.cc | 839 + storage/rocksdb/rocksdb/options/options_parser.h | 147 + .../rocksdb/options/options_sanity_check.cc | 38 + .../rocksdb/rocksdb/options/options_sanity_check.h | 50 + .../rocksdb/options/options_settable_test.cc | 492 + storage/rocksdb/rocksdb/options/options_test.cc | 2004 ++ storage/rocksdb/rocksdb/port/README | 10 + storage/rocksdb/rocksdb/port/jemalloc_helper.h | 77 + storage/rocksdb/rocksdb/port/likely.h | 18 + storage/rocksdb/rocksdb/port/malloc.h | 17 + storage/rocksdb/rocksdb/port/port.h | 21 + storage/rocksdb/rocksdb/port/port_dirent.h | 44 + storage/rocksdb/rocksdb/port/port_example.h | 101 + storage/rocksdb/rocksdb/port/port_posix.cc | 234 + storage/rocksdb/rocksdb/port/port_posix.h | 218 + storage/rocksdb/rocksdb/port/stack_trace.cc | 138 + storage/rocksdb/rocksdb/port/stack_trace.h | 22 + storage/rocksdb/rocksdb/port/sys_time.h | 47 + storage/rocksdb/rocksdb/port/util_logger.h | 20 + storage/rocksdb/rocksdb/port/win/env_default.cc | 41 + storage/rocksdb/rocksdb/port/win/env_win.cc | 1540 ++ storage/rocksdb/rocksdb/port/win/env_win.h | 347 + storage/rocksdb/rocksdb/port/win/io_win.cc | 1069 + storage/rocksdb/rocksdb/port/win/io_win.h | 456 + storage/rocksdb/rocksdb/port/win/port_win.cc | 269 + storage/rocksdb/rocksdb/port/win/port_win.h | 398 + storage/rocksdb/rocksdb/port/win/win_jemalloc.cc | 75 + storage/rocksdb/rocksdb/port/win/win_logger.cc | 192 + storage/rocksdb/rocksdb/port/win/win_logger.h | 66 + storage/rocksdb/rocksdb/port/win/win_thread.cc | 179 + storage/rocksdb/rocksdb/port/win/win_thread.h | 122 + storage/rocksdb/rocksdb/port/win/xpress_win.cc | 226 + storage/rocksdb/rocksdb/port/win/xpress_win.h | 27 + storage/rocksdb/rocksdb/port/xpress.h | 17 + storage/rocksdb/rocksdb/src.mk | 527 + .../table/adaptive/adaptive_table_factory.cc | 124 + .../table/adaptive/adaptive_table_factory.h | 63 + storage/rocksdb/rocksdb/table/block_based/block.cc | 1004 + storage/rocksdb/rocksdb/table/block_based/block.h | 631 + .../table/block_based/block_based_filter_block.cc | 347 + .../table/block_based/block_based_filter_block.h | 119 + .../block_based/block_based_filter_block_test.cc | 434 + .../table/block_based/block_based_table_builder.cc | 1217 + .../table/block_based/block_based_table_builder.h | 157 + .../table/block_based/block_based_table_factory.cc | 649 + .../table/block_based/block_based_table_factory.h | 195 + .../table/block_based/block_based_table_reader.cc | 4531 ++++ .../table/block_based/block_based_table_reader.h | 824 + .../rocksdb/table/block_based/block_builder.cc | 196 + .../rocksdb/table/block_based/block_builder.h | 75 + .../table/block_based/block_prefix_index.cc | 232 + .../rocksdb/table/block_based/block_prefix_index.h | 66 + .../rocksdb/table/block_based/block_test.cc | 627 + .../rocksdb/rocksdb/table/block_based/block_type.h | 30 + .../rocksdb/table/block_based/cachable_entry.h | 220 + .../rocksdb/table/block_based/data_block_footer.cc | 59 + .../rocksdb/table/block_based/data_block_footer.h | 25 + .../table/block_based/data_block_hash_index.cc | 93 + .../table/block_based/data_block_hash_index.h | 136 + .../block_based/data_block_hash_index_test.cc | 719 + .../rocksdb/table/block_based/filter_block.h | 176 + .../block_based/filter_block_reader_common.cc | 102 + .../table/block_based/filter_block_reader_common.h | 55 + .../rocksdb/table/block_based/filter_policy.cc | 759 + .../table/block_based/filter_policy_internal.h | 142 + .../table/block_based/flush_block_policy.cc | 88 + .../rocksdb/table/block_based/flush_block_policy.h | 41 + .../rocksdb/table/block_based/full_filter_block.cc | 338 + .../rocksdb/table/block_based/full_filter_block.h | 139 + .../table/block_based/full_filter_block_test.cc | 333 + .../rocksdb/table/block_based/index_builder.cc | 222 + .../rocksdb/table/block_based/index_builder.h | 443 + .../table/block_based/mock_block_based_table.h | 56 + .../table/block_based/parsed_full_filter_block.cc | 22 + .../table/block_based/parsed_full_filter_block.h | 40 + .../table/block_based/partitioned_filter_block.cc | 388 + .../table/block_based/partitioned_filter_block.h | 122 + .../block_based/partitioned_filter_block_test.cc | 424 + .../table/block_based/uncompression_dict_reader.cc | 120 + .../table/block_based/uncompression_dict_reader.h | 59 + storage/rocksdb/rocksdb/table/block_fetcher.cc | 284 + storage/rocksdb/rocksdb/table/block_fetcher.h | 109 + storage/rocksdb/rocksdb/table/cleanable_test.cc | 277 + .../rocksdb/table/cuckoo/cuckoo_table_builder.cc | 528 + .../rocksdb/table/cuckoo/cuckoo_table_builder.h | 136 + .../table/cuckoo/cuckoo_table_builder_test.cc | 662 + .../rocksdb/table/cuckoo/cuckoo_table_factory.cc | 72 + .../rocksdb/table/cuckoo/cuckoo_table_factory.h | 92 + .../rocksdb/table/cuckoo/cuckoo_table_reader.cc | 399 + .../rocksdb/table/cuckoo/cuckoo_table_reader.h | 100 + .../table/cuckoo/cuckoo_table_reader_test.cc | 578 + storage/rocksdb/rocksdb/table/format.cc | 465 + storage/rocksdb/rocksdb/table/format.h | 344 + storage/rocksdb/rocksdb/table/get_context.cc | 366 + storage/rocksdb/rocksdb/table/get_context.h | 191 + storage/rocksdb/rocksdb/table/internal_iterator.h | 182 + storage/rocksdb/rocksdb/table/iter_heap.h | 42 + storage/rocksdb/rocksdb/table/iterator.cc | 210 + storage/rocksdb/rocksdb/table/iterator_wrapper.h | 149 + storage/rocksdb/rocksdb/table/merger_test.cc | 180 + storage/rocksdb/rocksdb/table/merging_iterator.cc | 468 + storage/rocksdb/rocksdb/table/merging_iterator.h | 64 + storage/rocksdb/rocksdb/table/meta_blocks.cc | 525 + storage/rocksdb/rocksdb/table/meta_blocks.h | 152 + storage/rocksdb/rocksdb/table/mock_table.cc | 148 + storage/rocksdb/rocksdb/table/mock_table.h | 214 + storage/rocksdb/rocksdb/table/multiget_context.h | 259 + .../rocksdb/table/persistent_cache_helper.cc | 113 + .../rocksdb/table/persistent_cache_helper.h | 44 + .../rocksdb/table/persistent_cache_options.h | 34 + .../rocksdb/table/plain/plain_table_bloom.cc | 78 + .../rocksdb/table/plain/plain_table_bloom.h | 135 + .../rocksdb/table/plain/plain_table_builder.cc | 314 + .../rocksdb/table/plain/plain_table_builder.h | 151 + .../rocksdb/table/plain/plain_table_factory.cc | 235 + .../rocksdb/table/plain/plain_table_factory.h | 223 + .../rocksdb/table/plain/plain_table_index.cc | 211 + .../rocksdb/table/plain/plain_table_index.h | 249 + .../rocksdb/table/plain/plain_table_key_coding.cc | 498 + .../rocksdb/table/plain/plain_table_key_coding.h | 193 + .../rocksdb/table/plain/plain_table_reader.cc | 775 + .../rocksdb/table/plain/plain_table_reader.h | 246 + .../rocksdb/rocksdb/table/scoped_arena_iterator.h | 61 + storage/rocksdb/rocksdb/table/sst_file_reader.cc | 91 + .../rocksdb/rocksdb/table/sst_file_reader_test.cc | 174 + storage/rocksdb/rocksdb/table/sst_file_writer.cc | 319 + .../rocksdb/table/sst_file_writer_collectors.h | 94 + storage/rocksdb/rocksdb/table/table_builder.h | 170 + storage/rocksdb/rocksdb/table/table_properties.cc | 272 + .../rocksdb/table/table_properties_internal.h | 30 + storage/rocksdb/rocksdb/table/table_reader.h | 137 + .../rocksdb/rocksdb/table/table_reader_bench.cc | 347 + .../rocksdb/rocksdb/table/table_reader_caller.h | 39 + storage/rocksdb/rocksdb/table/table_test.cc | 4651 ++++ .../rocksdb/rocksdb/table/two_level_iterator.cc | 211 + storage/rocksdb/rocksdb/table/two_level_iterator.h | 43 + .../rocksdb/test_util/fault_injection_test_env.cc | 437 + .../rocksdb/test_util/fault_injection_test_env.h | 225 + storage/rocksdb/rocksdb/test_util/mock_time_env.h | 45 + storage/rocksdb/rocksdb/test_util/sync_point.cc | 66 + storage/rocksdb/rocksdb/test_util/sync_point.h | 144 + .../rocksdb/rocksdb/test_util/sync_point_impl.cc | 129 + .../rocksdb/rocksdb/test_util/sync_point_impl.h | 74 + storage/rocksdb/rocksdb/test_util/testharness.cc | 56 + storage/rocksdb/rocksdb/test_util/testharness.h | 47 + storage/rocksdb/rocksdb/test_util/testutil.cc | 454 + storage/rocksdb/rocksdb/test_util/testutil.h | 802 + .../rocksdb/test_util/transaction_test_util.cc | 387 + .../rocksdb/test_util/transaction_test_util.h | 132 + .../rocksdb/third-party/folly/folly/CPortability.h | 27 + .../third-party/folly/folly/ConstexprMath.h | 45 + .../third-party/folly/folly/Indestructible.h | 166 + .../rocksdb/third-party/folly/folly/Optional.h | 570 + .../rocksdb/third-party/folly/folly/Portability.h | 84 + .../rocksdb/third-party/folly/folly/ScopeGuard.h | 54 + .../rocksdb/third-party/folly/folly/Traits.h | 152 + .../rocksdb/rocksdb/third-party/folly/folly/Unit.h | 59 + .../rocksdb/third-party/folly/folly/Utility.h | 141 + .../third-party/folly/folly/chrono/Hardware.h | 33 + .../third-party/folly/folly/container/Array.h | 74 + .../third-party/folly/folly/detail/Futex-inl.h | 117 + .../third-party/folly/folly/detail/Futex.cpp | 263 + .../rocksdb/third-party/folly/folly/detail/Futex.h | 96 + .../third-party/folly/folly/functional/Invoke.h | 40 + .../rocksdb/third-party/folly/folly/hash/Hash.h | 29 + .../rocksdb/third-party/folly/folly/lang/Align.h | 38 + .../rocksdb/third-party/folly/folly/lang/Bits.h | 30 + .../rocksdb/third-party/folly/folly/lang/Launder.h | 51 + .../third-party/folly/folly/portability/Asm.h | 28 + .../folly/folly/portability/SysSyscall.h | 10 + .../third-party/folly/folly/portability/SysTypes.h | 26 + .../folly/synchronization/AtomicNotification-inl.h | 138 + .../folly/synchronization/AtomicNotification.cpp | 23 + .../folly/synchronization/AtomicNotification.h | 57 + .../folly/folly/synchronization/AtomicUtil-inl.h | 260 + .../folly/folly/synchronization/AtomicUtil.h | 52 + .../folly/folly/synchronization/Baton.h | 327 + .../folly/synchronization/DistributedMutex-inl.h | 1703 ++ .../folly/synchronization/DistributedMutex.cpp | 16 + .../folly/folly/synchronization/DistributedMutex.h | 304 + .../DistributedMutexSpecializations.h | 39 + .../folly/folly/synchronization/ParkingLot.cpp | 26 + .../folly/folly/synchronization/ParkingLot.h | 318 + .../folly/folly/synchronization/WaitOptions.cpp | 12 + .../folly/folly/synchronization/WaitOptions.h | 57 + .../synchronization/detail/InlineFunctionRef.h | 219 + .../synchronization/detail/ProxyLockable-inl.h | 207 + .../folly/synchronization/detail/ProxyLockable.h | 164 + .../folly/folly/synchronization/detail/Sleeper.h | 57 + .../folly/folly/synchronization/detail/Spin.h | 77 + .../synchronization/test/DistributedMutexTest.cpp | 1142 + .../gtest-1.8.1/fused-src/gtest/CMakeLists.txt | 1 + .../gtest-1.8.1/fused-src/gtest/gtest-all.cc | 11394 ++++++++++ .../gtest-1.8.1/fused-src/gtest/gtest.h | 22109 +++++++++++++++++++ .../gtest-1.8.1/fused-src/gtest/gtest_main.cc | 37 + storage/rocksdb/rocksdb/thirdparty.inc | 268 + storage/rocksdb/rocksdb/tools/CMakeLists.txt | 21 + storage/rocksdb/rocksdb/tools/Dockerfile | 5 + storage/rocksdb/rocksdb/tools/advisor/README.md | 96 + .../rocksdb/tools/advisor/advisor/__init__.py | 0 .../rocksdb/tools/advisor/advisor/bench_runner.py | 39 + .../advisor/advisor/config_optimizer_example.py | 134 + .../tools/advisor/advisor/db_bench_runner.py | 245 + .../tools/advisor/advisor/db_config_optimizer.py | 282 + .../rocksdb/tools/advisor/advisor/db_log_parser.py | 131 + .../tools/advisor/advisor/db_options_parser.py | 358 + .../tools/advisor/advisor/db_stats_fetcher.py | 338 + .../tools/advisor/advisor/db_timeseries_parser.py | 208 + .../rocksdb/tools/advisor/advisor/ini_parser.py | 76 + .../rocksdb/tools/advisor/advisor/rule_parser.py | 528 + .../tools/advisor/advisor/rule_parser_example.py | 89 + .../rocksdb/tools/advisor/advisor/rules.ini | 214 + .../rocksdb/rocksdb/tools/advisor/test/__init__.py | 0 .../rocksdb/tools/advisor/test/input_files/LOG-0 | 30 + .../rocksdb/tools/advisor/test/input_files/LOG-1 | 25 + .../tools/advisor/test/input_files/OPTIONS-000005 | 49 + .../test/input_files/log_stats_parser_keys_ts | 3 + .../tools/advisor/test/input_files/rules_err1.ini | 56 + .../tools/advisor/test/input_files/rules_err2.ini | 15 + .../tools/advisor/test/input_files/rules_err3.ini | 15 + .../tools/advisor/test/input_files/rules_err4.ini | 15 + .../tools/advisor/test/input_files/test_rules.ini | 47 + .../advisor/test/input_files/triggered_rules.ini | 83 + .../tools/advisor/test/test_db_bench_runner.py | 147 + .../tools/advisor/test/test_db_log_parser.py | 103 + .../tools/advisor/test/test_db_options_parser.py | 216 + .../tools/advisor/test/test_db_stats_fetcher.py | 126 + .../rocksdb/tools/advisor/test/test_rule_parser.py | 234 + .../rocksdb/tools/analyze_txn_stress_test.sh | 77 + storage/rocksdb/rocksdb/tools/auto_sanity_test.sh | 93 + storage/rocksdb/rocksdb/tools/benchmark.sh | 525 + storage/rocksdb/rocksdb/tools/benchmark_leveldb.sh | 187 + storage/rocksdb/rocksdb/tools/blob_dump.cc | 110 + .../rocksdb/tools/block_cache_analyzer/__init__.py | 2 + .../block_cache_analyzer/block_cache_pysim.py | 2000 ++ .../block_cache_analyzer/block_cache_pysim.sh | 156 + .../block_cache_analyzer/block_cache_pysim_test.py | 734 + .../block_cache_trace_analyzer.cc | 2308 ++ .../block_cache_trace_analyzer.h | 393 + .../block_cache_trace_analyzer_plot.py | 721 + .../block_cache_trace_analyzer_test.cc | 717 + .../block_cache_trace_analyzer_tool.cc | 25 + storage/rocksdb/rocksdb/tools/check_all_python.py | 22 + .../rocksdb/tools/check_format_compatible.sh | 191 + storage/rocksdb/rocksdb/tools/db_bench.cc | 21 + storage/rocksdb/rocksdb/tools/db_bench_tool.cc | 7048 ++++++ .../rocksdb/rocksdb/tools/db_bench_tool_test.cc | 320 + storage/rocksdb/rocksdb/tools/db_crashtest.py | 499 + storage/rocksdb/rocksdb/tools/db_repl_stress.cc | 159 + storage/rocksdb/rocksdb/tools/db_sanity_test.cc | 297 + storage/rocksdb/rocksdb/tools/dbench_monitor | 102 + storage/rocksdb/rocksdb/tools/dump/db_dump_tool.cc | 259 + storage/rocksdb/rocksdb/tools/dump/rocksdb_dump.cc | 63 + .../rocksdb/rocksdb/tools/dump/rocksdb_undump.cc | 62 + .../rocksdb/rocksdb/tools/generate_random_db.sh | 31 + .../rocksdb/rocksdb/tools/ingest_external_sst.sh | 18 + storage/rocksdb/rocksdb/tools/ldb.cc | 21 + storage/rocksdb/rocksdb/tools/ldb_cmd.cc | 3437 +++ storage/rocksdb/rocksdb/tools/ldb_cmd_impl.h | 628 + storage/rocksdb/rocksdb/tools/ldb_cmd_test.cc | 585 + storage/rocksdb/rocksdb/tools/ldb_test.py | 595 + storage/rocksdb/rocksdb/tools/ldb_tool.cc | 140 + storage/rocksdb/rocksdb/tools/pflag | 217 + storage/rocksdb/rocksdb/tools/rdb/.gitignore | 1 + storage/rocksdb/rocksdb/tools/rdb/API.md | 178 + storage/rocksdb/rocksdb/tools/rdb/README.md | 40 + storage/rocksdb/rocksdb/tools/rdb/binding.gyp | 25 + storage/rocksdb/rocksdb/tools/rdb/db_wrapper.cc | 526 + storage/rocksdb/rocksdb/tools/rdb/db_wrapper.h | 60 + storage/rocksdb/rocksdb/tools/rdb/rdb | 3 + storage/rocksdb/rocksdb/tools/rdb/rdb.cc | 16 + storage/rocksdb/rocksdb/tools/rdb/unit_test.js | 125 + .../rocksdb/rocksdb/tools/reduce_levels_test.cc | 220 + storage/rocksdb/rocksdb/tools/regression_test.sh | 470 + .../rocksdb/tools/report_lite_binary_size.sh | 42 + storage/rocksdb/rocksdb/tools/rocksdb_dump_test.sh | 9 + storage/rocksdb/rocksdb/tools/run_flash_bench.sh | 359 + storage/rocksdb/rocksdb/tools/run_leveldb.sh | 175 + storage/rocksdb/rocksdb/tools/sample-dump.dmp | Bin 0 -> 100 bytes storage/rocksdb/rocksdb/tools/sst_dump.cc | 21 + storage/rocksdb/rocksdb/tools/sst_dump_test.cc | 282 + storage/rocksdb/rocksdb/tools/sst_dump_tool.cc | 778 + storage/rocksdb/rocksdb/tools/sst_dump_tool_imp.h | 87 + storage/rocksdb/rocksdb/tools/trace_analyzer.cc | 25 + .../rocksdb/rocksdb/tools/trace_analyzer_test.cc | 727 + .../rocksdb/rocksdb/tools/trace_analyzer_tool.cc | 2001 ++ .../rocksdb/rocksdb/tools/trace_analyzer_tool.h | 292 + storage/rocksdb/rocksdb/tools/verify_random_db.sh | 39 + .../rocksdb/rocksdb/tools/write_external_sst.sh | 25 + storage/rocksdb/rocksdb/tools/write_stress.cc | 305 + .../rocksdb/rocksdb/tools/write_stress_runner.py | 74 + .../rocksdb/trace_replay/block_cache_tracer.cc | 497 + .../rocksdb/trace_replay/block_cache_tracer.h | 294 + .../trace_replay/block_cache_tracer_test.cc | 378 + .../rocksdb/rocksdb/trace_replay/trace_replay.cc | 485 + .../rocksdb/rocksdb/trace_replay/trace_replay.h | 189 + storage/rocksdb/rocksdb/util/aligned_buffer.h | 248 + storage/rocksdb/rocksdb/util/autovector.h | 361 + storage/rocksdb/rocksdb/util/autovector_test.cc | 330 + storage/rocksdb/rocksdb/util/bloom_impl.h | 483 + storage/rocksdb/rocksdb/util/bloom_test.cc | 912 + storage/rocksdb/rocksdb/util/build_version.cc.in | 5 + storage/rocksdb/rocksdb/util/build_version.h | 15 + storage/rocksdb/rocksdb/util/cast_util.h | 21 + storage/rocksdb/rocksdb/util/channel.h | 67 + storage/rocksdb/rocksdb/util/coding.cc | 89 + storage/rocksdb/rocksdb/util/coding.h | 480 + storage/rocksdb/rocksdb/util/coding_test.cc | 217 + .../rocksdb/util/compaction_job_stats_impl.cc | 91 + storage/rocksdb/rocksdb/util/comparator.cc | 216 + storage/rocksdb/rocksdb/util/compression.h | 1407 ++ .../rocksdb/util/compression_context_cache.cc | 108 + .../rocksdb/util/compression_context_cache.h | 47 + .../rocksdb/util/concurrent_task_limiter_impl.cc | 67 + .../rocksdb/util/concurrent_task_limiter_impl.h | 67 + storage/rocksdb/rocksdb/util/core_local.h | 83 + storage/rocksdb/rocksdb/util/crc32c.cc | 1255 ++ storage/rocksdb/rocksdb/util/crc32c.h | 51 + storage/rocksdb/rocksdb/util/crc32c_arm64.cc | 129 + storage/rocksdb/rocksdb/util/crc32c_arm64.h | 47 + storage/rocksdb/rocksdb/util/crc32c_ppc.c | 94 + storage/rocksdb/rocksdb/util/crc32c_ppc.h | 19 + storage/rocksdb/rocksdb/util/crc32c_ppc_asm.S | 752 + .../rocksdb/rocksdb/util/crc32c_ppc_constants.h | 900 + storage/rocksdb/rocksdb/util/crc32c_test.cc | 180 + storage/rocksdb/rocksdb/util/defer.h | 52 + storage/rocksdb/rocksdb/util/defer_test.cc | 39 + storage/rocksdb/rocksdb/util/duplicate_detector.h | 68 + storage/rocksdb/rocksdb/util/dynamic_bloom.cc | 70 + storage/rocksdb/rocksdb/util/dynamic_bloom.h | 214 + storage/rocksdb/rocksdb/util/dynamic_bloom_test.cc | 324 + .../rocksdb/rocksdb/util/file_checksum_helper.cc | 85 + .../rocksdb/rocksdb/util/file_checksum_helper.h | 117 + .../rocksdb/util/file_reader_writer_test.cc | 444 + storage/rocksdb/rocksdb/util/filelock_test.cc | 141 + storage/rocksdb/rocksdb/util/filter_bench.cc | 751 + storage/rocksdb/rocksdb/util/gflags_compat.h | 19 + storage/rocksdb/rocksdb/util/hash.cc | 83 + storage/rocksdb/rocksdb/util/hash.h | 120 + storage/rocksdb/rocksdb/util/hash_map.h | 67 + storage/rocksdb/rocksdb/util/hash_test.cc | 377 + storage/rocksdb/rocksdb/util/heap.h | 166 + storage/rocksdb/rocksdb/util/heap_test.cc | 139 + storage/rocksdb/rocksdb/util/kv_map.h | 33 + storage/rocksdb/rocksdb/util/log_write_bench.cc | 86 + storage/rocksdb/rocksdb/util/murmurhash.cc | 191 + storage/rocksdb/rocksdb/util/murmurhash.h | 42 + storage/rocksdb/rocksdb/util/mutexlock.h | 135 + storage/rocksdb/rocksdb/util/ppc-opcode.h | 27 + storage/rocksdb/rocksdb/util/random.cc | 38 + storage/rocksdb/rocksdb/util/random.h | 166 + storage/rocksdb/rocksdb/util/random_test.cc | 105 + storage/rocksdb/rocksdb/util/rate_limiter.cc | 339 + storage/rocksdb/rocksdb/util/rate_limiter.h | 113 + storage/rocksdb/rocksdb/util/rate_limiter_test.cc | 235 + storage/rocksdb/rocksdb/util/repeatable_thread.h | 149 + .../rocksdb/rocksdb/util/repeatable_thread_test.cc | 107 + storage/rocksdb/rocksdb/util/set_comparator.h | 22 + storage/rocksdb/rocksdb/util/slice.cc | 243 + storage/rocksdb/rocksdb/util/slice_test.cc | 163 + .../rocksdb/rocksdb/util/slice_transform_test.cc | 153 + storage/rocksdb/rocksdb/util/status.cc | 143 + storage/rocksdb/rocksdb/util/stderr_logger.h | 31 + storage/rocksdb/rocksdb/util/stop_watch.h | 118 + storage/rocksdb/rocksdb/util/string_util.cc | 409 + storage/rocksdb/rocksdb/util/string_util.h | 138 + storage/rocksdb/rocksdb/util/thread_list_test.cc | 352 + storage/rocksdb/rocksdb/util/thread_local.cc | 554 + storage/rocksdb/rocksdb/util/thread_local.h | 101 + storage/rocksdb/rocksdb/util/thread_local_test.cc | 580 + storage/rocksdb/rocksdb/util/thread_operation.h | 121 + storage/rocksdb/rocksdb/util/threadpool_imp.cc | 507 + storage/rocksdb/rocksdb/util/threadpool_imp.h | 112 + storage/rocksdb/rocksdb/util/timer_queue.h | 230 + storage/rocksdb/rocksdb/util/timer_queue_test.cc | 72 + .../rocksdb/rocksdb/util/user_comparator_wrapper.h | 65 + storage/rocksdb/rocksdb/util/util.h | 16 + storage/rocksdb/rocksdb/util/vector_iterator.h | 101 + storage/rocksdb/rocksdb/util/xxh3p.h | 1648 ++ storage/rocksdb/rocksdb/util/xxhash.cc | 1160 + storage/rocksdb/rocksdb/util/xxhash.h | 598 + .../rocksdb/utilities/backupable/backupable_db.cc | 1989 ++ .../utilities/backupable/backupable_db_test.cc | 1863 ++ .../utilities/blob_db/blob_compaction_filter.cc | 329 + .../utilities/blob_db/blob_compaction_filter.h | 168 + .../rocksdb/rocksdb/utilities/blob_db/blob_db.cc | 102 + .../rocksdb/rocksdb/utilities/blob_db/blob_db.h | 261 + .../rocksdb/utilities/blob_db/blob_db_gc_stats.h | 52 + .../rocksdb/utilities/blob_db/blob_db_impl.cc | 2116 ++ .../rocksdb/utilities/blob_db/blob_db_impl.h | 495 + .../utilities/blob_db/blob_db_impl_filesnapshot.cc | 109 + .../rocksdb/utilities/blob_db/blob_db_iterator.h | 147 + .../rocksdb/utilities/blob_db/blob_db_listener.h | 66 + .../rocksdb/utilities/blob_db/blob_db_test.cc | 1992 ++ .../rocksdb/utilities/blob_db/blob_dump_tool.cc | 278 + .../rocksdb/utilities/blob_db/blob_dump_tool.h | 57 + .../rocksdb/rocksdb/utilities/blob_db/blob_file.cc | 320 + .../rocksdb/rocksdb/utilities/blob_db/blob_file.h | 252 + .../rocksdb/utilities/blob_db/blob_log_format.cc | 149 + .../rocksdb/utilities/blob_db/blob_log_format.h | 133 + .../rocksdb/utilities/blob_db/blob_log_reader.cc | 105 + .../rocksdb/utilities/blob_db/blob_log_reader.h | 82 + .../rocksdb/utilities/blob_db/blob_log_writer.cc | 139 + .../rocksdb/utilities/blob_db/blob_log_writer.h | 94 + .../cassandra/cassandra_compaction_filter.cc | 47 + .../cassandra/cassandra_compaction_filter.h | 42 + .../utilities/cassandra/cassandra_format_test.cc | 367 + .../cassandra/cassandra_functional_test.cc | 311 + .../cassandra/cassandra_row_merge_test.cc | 112 + .../cassandra/cassandra_serialize_test.cc | 188 + .../rocksdb/rocksdb/utilities/cassandra/format.cc | 390 + .../rocksdb/rocksdb/utilities/cassandra/format.h | 197 + .../rocksdb/utilities/cassandra/merge_operator.cc | 67 + .../rocksdb/utilities/cassandra/merge_operator.h | 44 + .../rocksdb/utilities/cassandra/serialize.h | 75 + .../rocksdb/utilities/cassandra/test_utils.cc | 75 + .../rocksdb/utilities/cassandra/test_utils.h | 46 + .../utilities/checkpoint/checkpoint_impl.cc | 516 + .../rocksdb/utilities/checkpoint/checkpoint_impl.h | 79 + .../utilities/checkpoint/checkpoint_test.cc | 829 + .../remove_emptyvalue_compactionfilter.cc | 29 + .../remove_emptyvalue_compactionfilter.h | 27 + .../utilities/convenience/info_log_finder.cc | 25 + storage/rocksdb/rocksdb/utilities/debug.cc | 80 + storage/rocksdb/rocksdb/utilities/env_librados.cc | 1497 ++ storage/rocksdb/rocksdb/utilities/env_librados.md | 122 + .../rocksdb/rocksdb/utilities/env_librados_test.cc | 1146 + storage/rocksdb/rocksdb/utilities/env_mirror.cc | 262 + .../rocksdb/rocksdb/utilities/env_mirror_test.cc | 223 + storage/rocksdb/rocksdb/utilities/env_timed.cc | 145 + .../rocksdb/rocksdb/utilities/env_timed_test.cc | 44 + .../utilities/leveldb_options/leveldb_options.cc | 56 + .../rocksdb/utilities/memory/memory_test.cc | 278 + .../rocksdb/utilities/memory/memory_util.cc | 52 + .../rocksdb/rocksdb/utilities/merge_operators.h | 55 + .../rocksdb/utilities/merge_operators/bytesxor.cc | 59 + .../rocksdb/utilities/merge_operators/bytesxor.h | 39 + .../rocksdb/utilities/merge_operators/max.cc | 77 + .../rocksdb/utilities/merge_operators/put.cc | 83 + .../rocksdb/utilities/merge_operators/sortlist.cc | 100 + .../rocksdb/utilities/merge_operators/sortlist.h | 38 + .../merge_operators/string_append/stringappend.cc | 59 + .../merge_operators/string_append/stringappend.h | 31 + .../merge_operators/string_append/stringappend2.cc | 117 + .../merge_operators/string_append/stringappend2.h | 49 + .../string_append/stringappend_test.cc | 601 + .../rocksdb/utilities/merge_operators/uint64add.cc | 69 + .../rocksdb/rocksdb/utilities/object_registry.cc | 87 + .../rocksdb/utilities/object_registry_test.cc | 174 + .../option_change_migration.cc | 168 + .../option_change_migration_test.cc | 425 + .../rocksdb/utilities/options/options_util.cc | 114 + .../rocksdb/utilities/options/options_util_test.cc | 363 + .../utilities/persistent_cache/block_cache_tier.cc | 425 + .../utilities/persistent_cache/block_cache_tier.h | 156 + .../persistent_cache/block_cache_tier_file.cc | 608 + .../persistent_cache/block_cache_tier_file.h | 296 + .../block_cache_tier_file_buffer.h | 127 + .../persistent_cache/block_cache_tier_metadata.cc | 86 + .../persistent_cache/block_cache_tier_metadata.h | 125 + .../utilities/persistent_cache/hash_table.h | 238 + .../utilities/persistent_cache/hash_table_bench.cc | 308 + .../persistent_cache/hash_table_evictable.h | 168 + .../utilities/persistent_cache/hash_table_test.cc | 160 + .../rocksdb/utilities/persistent_cache/lrulist.h | 174 + .../persistent_cache/persistent_cache_bench.cc | 360 + .../persistent_cache/persistent_cache_test.cc | 474 + .../persistent_cache/persistent_cache_test.h | 285 + .../persistent_cache/persistent_cache_tier.cc | 163 + .../persistent_cache/persistent_cache_tier.h | 336 + .../persistent_cache/persistent_cache_util.h | 67 + .../persistent_cache/volatile_tier_impl.cc | 138 + .../persistent_cache/volatile_tier_impl.h | 142 + .../utilities/simulator_cache/cache_simulator.cc | 274 + .../utilities/simulator_cache/cache_simulator.h | 231 + .../simulator_cache/cache_simulator_test.cc | 494 + .../rocksdb/utilities/simulator_cache/sim_cache.cc | 354 + .../utilities/simulator_cache/sim_cache_test.cc | 225 + .../compact_on_deletion_collector.cc | 90 + .../compact_on_deletion_collector.h | 72 + .../compact_on_deletion_collector_test.cc | 178 + .../utilities/trace/file_trace_reader_writer.cc | 123 + .../utilities/trace/file_trace_reader_writer.h | 48 + .../transactions/optimistic_transaction.cc | 187 + .../transactions/optimistic_transaction.h | 101 + .../transactions/optimistic_transaction_db_impl.cc | 111 + .../transactions/optimistic_transaction_db_impl.h | 71 + .../transactions/optimistic_transaction_test.cc | 1535 ++ .../transactions/pessimistic_transaction.cc | 723 + .../transactions/pessimistic_transaction.h | 225 + .../transactions/pessimistic_transaction_db.cc | 632 + .../transactions/pessimistic_transaction_db.h | 220 + .../utilities/transactions/snapshot_checker.cc | 49 + .../utilities/transactions/transaction_base.cc | 837 + .../utilities/transactions/transaction_base.h | 374 + .../transactions/transaction_db_mutex_impl.cc | 135 + .../transactions/transaction_db_mutex_impl.h | 26 + .../utilities/transactions/transaction_lock_mgr.cc | 745 + .../utilities/transactions/transaction_lock_mgr.h | 158 + .../utilities/transactions/transaction_test.cc | 6224 ++++++ .../utilities/transactions/transaction_test.h | 517 + .../utilities/transactions/transaction_util.cc | 182 + .../utilities/transactions/transaction_util.h | 103 + .../write_prepared_transaction_test.cc | 3524 +++ .../utilities/transactions/write_prepared_txn.cc | 473 + .../utilities/transactions/write_prepared_txn.h | 119 + .../transactions/write_prepared_txn_db.cc | 998 + .../utilities/transactions/write_prepared_txn_db.h | 1111 + .../write_unprepared_transaction_test.cc | 727 + .../utilities/transactions/write_unprepared_txn.cc | 999 + .../utilities/transactions/write_unprepared_txn.h | 341 + .../transactions/write_unprepared_txn_db.cc | 468 + .../transactions/write_unprepared_txn_db.h | 148 + .../rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc | 335 + .../rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h | 361 + storage/rocksdb/rocksdb/utilities/ttl/ttl_test.cc | 693 + .../rocksdb/utilities/util_merge_operators_test.cc | 99 + .../write_batch_with_index.cc | 1065 + .../write_batch_with_index_internal.cc | 288 + .../write_batch_with_index_internal.h | 145 + .../write_batch_with_index_test.cc | 1846 ++ storage/rocksdb/tools/mysql_ldb.cc | 18 + storage/rocksdb/unittest/CMakeLists.txt | 22 + .../rocksdb/unittest/test_properties_collector.cc | 54 + storage/rocksdb/ut0counter.h | 203 + 2924 files changed, 665879 insertions(+) create mode 100644 storage/rocksdb/.clang-format create mode 100644 storage/rocksdb/.gitignore create mode 100644 storage/rocksdb/CMakeLists.txt create mode 100644 storage/rocksdb/README create mode 100644 storage/rocksdb/atomic_stat.h create mode 100644 storage/rocksdb/build_rocksdb.cmake create mode 100644 storage/rocksdb/event_listener.cc create mode 100644 storage/rocksdb/event_listener.h create mode 100755 storage/rocksdb/get_rocksdb_files.sh create mode 100644 storage/rocksdb/ha_rocksdb.cc create mode 100644 storage/rocksdb/ha_rocksdb.h create mode 100644 storage/rocksdb/ha_rocksdb_proto.h create mode 100644 storage/rocksdb/logger.h create mode 100755 storage/rocksdb/myrocks_hotbackup.py create mode 100644 storage/rocksdb/mysql-test/rocksdb/combinations create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/deadlock_stats.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/dup_key_update.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/group_min_max.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/have_direct_io.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb_default.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/have_rocksdb_replication.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/have_write_committed.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/have_write_prepared.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/index_merge1.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/index_merge2.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/index_merge_2sweeps.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/index_merge_ror.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/index_merge_ror_cpk.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case1_1.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case1_2.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case2.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case3.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case4.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case5.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case6.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/locking_issues_case7.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/prefix_index_only_query_check.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_invalid_option.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/restart_mysqld_with_option.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/rocksdb_icp.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/simple_deadlock.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/start_mysqld_with_option.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/include/use_direct_io_option.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/my.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/1st.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/2pc_group_commit.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_cardinality.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_crash.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/add_index_inplace_sstfilewriter.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/add_unique_index_inplace.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/allow_no_pk_concurrent_insert.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/allow_no_primary_key_with_sk.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/allow_to_start_after_corruption.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/alter_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/analyze_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/apply_changes_iter.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/autoinc_crash_safe_partition.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/autoinc_debug.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/autoinc_secondary.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/autoinc_vars_thread_2.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/binlog_rotate_crash.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/blind_delete_rr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bloomfilter.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bloomfilter2.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bloomfilter3.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bloomfilter4.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bloomfilter5.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_bulk_load.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bloomfilter_skip.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bulk_load.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bulk_load_drop_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bulk_load_errors.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_cf_and_data.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bulk_load_rev_data.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bulk_load_sk.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_errors.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bulk_load_unsorted_rev.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bypass_select_basic_bloom.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/bytes_written.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/cardinality.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/check_flags.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/check_ignore_unknown_options.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/check_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/checkpoint.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/checksum_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/checksum_table_live.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/col_opt_default.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/col_opt_not_null.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/col_opt_null.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/col_opt_unsigned.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/col_opt_zerofill.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/collation.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/collation_exception.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/com_rpc_tx.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/commit_in_the_middle_ddl.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/compact_deletes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/compression_zstd.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/concurrent_alter.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_read_committed.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_repeatable_read.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/cons_snapshot_serializable.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/corrupted_data_reads_debug.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/covered_unpack_info_format.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/create_no_primary_key_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/create_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/ddl_high_priority.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/deadlock.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/deadlock_stats.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/deadlock_tracking.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/delete.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/delete_ignore.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/delete_quick.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/delete_with_keys.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/describe.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/drop_database.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/drop_index_inplace.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/drop_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/drop_table2.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/drop_table3.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/dup_key_update.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/duplicate_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/explicit_snapshot.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/fail_system_cf.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/fast_prefix_index_fetch.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/force_shutdown.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/foreign_key.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/gap_lock_issue254.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/gap_lock_raise_error.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/get_error_message.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/group_min_max.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/ha_extra_keyread.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/handler_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/hermitage.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/i_s.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/i_s_ddl.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/i_s_deadlock.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/index.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/index_file_map.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/index_key_block_size.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/index_merge_rocksdb2.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/index_primary.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/index_type_btree.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/index_type_hash.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/information_schema.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/innodb_i_s_tables_disabled.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/insert.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/insert_optimized_config.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/insert_with_keys.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/issue100.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/issue100_delete.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/issue111.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/issue243_transactionStatus.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/issue255.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/issue290.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/issue314.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/issue495.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/issue884.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/issue896.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/issue900.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/iterator_bounds.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/kill.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/level_read_committed.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/level_read_uncommitted.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/level_repeatable_read.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/level_serializable.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/loaddata.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/lock.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/lock_info.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/lock_rows_not_exist.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/lock_wait_timeout_stats.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_1_rc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_1_rr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_2_rc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case1_2_rr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rc_lsr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case2_rr_lsr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case3_rc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case3_rr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case4_rc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case4_rr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case5_rc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case5_rr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case6_rc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case6_rr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rc_lsr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/locking_issues_case7_rr_lsr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/mariadb_ignore_dirs.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/mariadb_misc_binlog.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/mariadb_plugin.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_fixes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/mariadb_port_rpl.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/max_open_files.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/misc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/multi_varchar_sk_lookup.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_blind_replace.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/mysqlbinlog_gtid_skip_empty_trans_rocksdb.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/mysqldump.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/mysqldump2.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/native_procedure.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/negative_stats.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/no_merge_sort.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_base.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/optimize_myrocks_replace_into_lock.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/optimize_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/optimizer_loose_index_scans.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/partition.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/percona_nonflushing_analyze_debug.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/perf_context.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/persistent_cache.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/prefix_extractor_override.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/read_only_tx.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/records_in_range.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/repair_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/replace.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_options.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_per_partition.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_cf_reverse.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_checksums.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_concurrent_delete.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_datadir.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_detect_rc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_detect_rr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_stress_rc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_deadlock_stress_rr.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_debug.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_icp_rev.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_locks.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_parts.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_qcache.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_range2.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_read_free_rpl_stress.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_row_stats.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_table_stats_sampling_pct_change.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rocksdb_timeout_rollback.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rollback_savepoint.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rpl_row_not_found_rc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rpl_row_rocksdb.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rpl_row_stats.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rpl_row_triggers.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rpl_savepoint.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rpl_statement.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rpl_statement_not_found.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rqg_examples.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rqg_runtime.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/rqg_transactions.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/secondary_key_update_lock.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/select.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/select_for_update.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/select_for_update_skip_locked_nowait.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/select_lock_in_share_mode.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/show_engine.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/show_table_status.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/shutdown.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/singledelete.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/skip_core_dump_on_error.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/skip_validate_tmp_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/slow_query_log.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/statistics.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/table_stats.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_ai.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_avg_row_length.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_checksum.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_connection.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_data_index_dir.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_delay_key_write.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_insert_method.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_key_block_size.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_max_rows.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_min_rows.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_pack_keys.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_password.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_row_format.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_opt_union.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tbl_standard_opts.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/tmpdir.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/transaction.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/truncate_partition.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/truncate_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/truncate_table3.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/trx_info.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/trx_info_rpl.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/ttl_primary.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_read_filtering.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/ttl_primary_with_partitions.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/ttl_rows_examined.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_read_filtering_multiple_index.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/ttl_secondary_with_partitions.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_binary.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_binary_indexes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_bit.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_bit_indexes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_blob.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_blob_indexes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_bool.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_char.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_char_indexes_collation.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_date_time.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_date_time_indexes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_decimal.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_enum.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_enum_indexes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_fixed.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_fixed_indexes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_float.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_float_indexes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_int.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_int_indexes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_set.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_set_indexes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_text.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_text_indexes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_varbinary.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/type_varchar.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/unique_check.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/unique_sec.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/unique_sec_rev_cf.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/unsupported_tx_isolations.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/update.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/update_ignore.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/update_multi.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/update_with_keys.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/use_direct_io_for_flush_and_compaction.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/use_direct_reads_writes.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/validate_datadic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/varbinary_format.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/write_sync.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/r/xa.result create mode 100644 storage/rocksdb/mysql-test/rocksdb/slow_query_log.awk create mode 100644 storage/rocksdb/mysql-test/rocksdb/suite.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/suite.pm create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/1st.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/2pc_group_commit.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_cardinality-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_cardinality.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_crash.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/add_index_inplace_sstfilewriter.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/add_unique_index_inplace.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/allow_no_pk_concurrent_insert.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/allow_no_primary_key_with_sk.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/allow_to_start_after_corruption.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/alter_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/analyze_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/apply_changes_iter.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_crash_safe_partition.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_secondary.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/autoinc_vars_thread_2.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/binlog_rotate_crash.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/blind_delete_rr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/blind_delete_without_tx_api.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter2-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter2.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter3.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter4-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter4.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter5.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_bulk_load-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_bulk_load.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_load_select.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_skip-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_skip.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bloomfilter_table_def.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bulk_load.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bulk_load_drop_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_cf_and_data.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bulk_load_rev_data.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bulk_load_sk.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_errors.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bulk_load_unsorted_rev.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bypass_select_basic_bloom.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/bytes_written.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/cardinality-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/cardinality.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/check_flags.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/check_ignore_unknown_options.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/check_log_for_xa.py create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/check_table.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/check_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/checkpoint.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/checksum_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/checksum_table_live.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/col_not_null.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/col_not_null_timestamp.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/col_null.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/col_opt_default.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/col_opt_not_null.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/col_opt_null.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/col_opt_unsigned.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/col_opt_zerofill.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/collation-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/collation.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/collation_exception-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/collation_exception.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/com_rpc_tx.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/commit_in_the_middle_ddl.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/compact_deletes-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/compact_deletes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/compact_deletes_test.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/compression_zstd.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/concurrent_alter.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_read_committed.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_repeatable_read.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/cons_snapshot_serializable.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/consistent_snapshot.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/corrupted_data_reads_debug.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/covered_unpack_info_format.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/create_no_primary_key_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/create_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ddl_high_priority.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/deadlock.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/deadlock_stats.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/deadlock_tracking.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/delete.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/delete_ignore.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/delete_quick.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/delete_with_keys.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/describe.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/disabled.def create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/drop_database.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/drop_index_inplace.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/drop_stats_procedure.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/drop_table-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/drop_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/drop_table2.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/drop_table3-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/drop_table3.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/drop_table3.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/drop_table3_repopulate_table.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/drop_table_repopulate_table.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/drop_table_sync.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/dup_key_update.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/duplicate_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/explicit_snapshot-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/explicit_snapshot.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/fail_system_cf.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/fast_prefix_index_fetch.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/force_shutdown.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/foreign_key.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/gap_lock_issue254-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/gap_lock_issue254.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/gap_lock_raise_error.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/get_error_message.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/group_min_max-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/group_min_max.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ha_extra_keyread.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/handler_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/hermitage.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/hermitage.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/hermitage_init.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/i_s.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/i_s_ddl.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/i_s_deadlock.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/index.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/index.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/index_file_map-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/index_file_map.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/index_key_block_size.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/index_merge_rocksdb2.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/index_primary.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/index_type_btree.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/index_type_hash.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/information_schema-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/information_schema.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/init_stats_procedure.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/innodb_i_s_tables_disabled-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/innodb_i_s_tables_disabled.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/insert.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/insert_optimized_config.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/insert_with_keys.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/issue100.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/issue100_delete-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/issue100_delete.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/issue111.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/issue243_transactionStatus.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/issue255.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/issue290.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/issue314.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/issue495.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/issue884.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/issue896.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/issue900.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/iterator_bounds.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/kill.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/level_read_committed.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/level_read_uncommitted.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/level_repeatable_read.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/level_serializable.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/loaddata.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/loaddata.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/lock.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/lock_info.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/lock_rows_not_exist.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/lock_wait_timeout_stats.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_1_rc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_1_rr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_2_rc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case1_2_rr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rc_lsr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case2_rr_lsr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case3_rc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case3_rr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case4_rc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case4_rr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case5_rc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case5_rr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case6_rc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case6_rr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rc_lsr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/locking_issues_case7_rr_lsr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mariadb_ignore_dirs.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mariadb_misc_binlog-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mariadb_misc_binlog.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mariadb_plugin-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mariadb_plugin.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_fixes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mariadb_port_rpl.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/max_open_files.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/misc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/multi_varchar_sk_lookup.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_blind_replace.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_gtid_skip_empty_trans_rocksdb-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mysqlbinlog_gtid_skip_empty_trans_rocksdb.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mysqldump-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mysqldump.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mysqldump2-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/mysqldump2.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/native_procedure-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/native_procedure.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/negative_stats.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/no_merge_sort.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/no_primary_key_basic_ops.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_base.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/optimize_myrocks_replace_into_lock.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/optimize_table-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/optimize_table.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/optimize_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/optimizer_loose_index_scans.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/partition.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/percona_nonflushing_analyze_debug.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/perf_context.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/persistent_cache.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/prefix_extractor_override.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/read_only_tx-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/read_only_tx.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/records_in_range-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/records_in_range.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/repair_table.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/repair_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/replace.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options-master.opt create mode 100755 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options-master.sh create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_options.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_per_partition.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_reverse-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_cf_reverse.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_checksums.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_main.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_range.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_delete_sk.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_concurrent_insert.py create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_datadir.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rc-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_detect_rr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress.py create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rc-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_deadlock_stress_rr.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_debug.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp_rev-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_icp_rev.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_locks.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_parts-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_parts.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_qcache-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_qcache.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_range2.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_read_free_rpl_stress.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_row_stats.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_table_stats_sampling_pct_change.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rocksdb_timeout_rollback.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rollback_savepoint.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_row_not_found_rc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_row_rocksdb.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_row_rocksdb.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats-slave.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_row_stats.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_row_triggers.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_savepoint.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_statement.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_statement_not_found.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rpl_statement_not_found.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rqg.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rqg_examples-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rqg_examples.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rqg_runtime.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/rqg_transactions.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/se-innodb.out create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/secondary_key_update_lock.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/select.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/select_for_update.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/select_for_update_skip_locked_nowait.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/select_lock_in_share_mode.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/set_checkpoint.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/show_engine.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/show_table_status-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/show_table_status.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/shutdown-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/shutdown.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/singledelete-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/singledelete.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/skip_core_dump_on_error.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/skip_validate_tmp_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/slow_query_log-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/slow_query_log.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/statistics-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/statistics.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/table_stats-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/table_stats.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_ai.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_avg_row_length.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_checksum.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_connection.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_data_index_dir.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_delay_key_write.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_insert_method.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_key_block_size.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_max_rows.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_min_rows.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_pack_keys.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_password.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_row_format.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_opt_union.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tbl_standard_opts.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/tmpdir.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/transaction.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/transaction_isolation.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/transaction_select.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/truncate_partition.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/truncate_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/truncate_table3-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/truncate_table3.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/trx_info.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/trx_info_rpl.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_primary-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_primary.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_read_filtering.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_primary_with_partitions.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_rows_examined.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_read_filtering_multiple_index.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/ttl_secondary_with_partitions.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_binary.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_binary.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_binary_indexes-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_binary_indexes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_bit.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_bit.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_bit_indexes-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_bit_indexes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_blob.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_blob.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_blob_indexes-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_blob_indexes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_bool.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_bool.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_char.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_char.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes_collation-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_char_indexes_collation.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_date_time.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_date_time.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_date_time_indexes-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_date_time_indexes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_decimal-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_decimal.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_enum.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_enum.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_enum_indexes-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_enum_indexes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_fixed.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_fixed.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_fixed_indexes-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_fixed_indexes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_float.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_float.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_float_indexes-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_float_indexes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_int.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_int.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_int_indexes-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_int_indexes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_set.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_set.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_set_indexes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_text.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_text.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_text_indexes-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_text_indexes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_varbinary.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_varbinary.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_varchar-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_varchar.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_varchar.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/type_varchar_endspace.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/unique_check.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/unique_sec.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/unique_sec.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/unique_sec_rev_cf.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/unsupported_tx_isolations.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/update.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/update_ignore-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/update_ignore.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/update_multi.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/update_multi_exec.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/update_with_keys.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/use_direct_io_for_flush_and_compaction.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/use_direct_reads_writes.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/validate_datadic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/varbinary_format.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/write_sync.test create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/xa-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb/t/xa.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/base.cnf create mode 100755 storage/rocksdb/mysql-test/rocksdb_hotbackup/include/clean_tmpfiles.sh create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/include/cleanup.inc create mode 100755 storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_slocket_socket.sh create mode 100755 storage/rocksdb/mysql-test/rocksdb_hotbackup/include/create_table.sh create mode 100755 storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data.sh create mode 100755 storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data_and_run.sh create mode 100755 storage/rocksdb/mysql-test/rocksdb_hotbackup/include/load_data_slocket.sh create mode 100755 storage/rocksdb/mysql-test/rocksdb_hotbackup/include/remove_slocket_socket.sh create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup.inc create mode 100755 storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_replication_gtid.sh create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_replication_gtid_and_sync.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/include/setup_slocket.inc create mode 100755 storage/rocksdb/mysql-test/rocksdb_hotbackup/include/stream_run.sh create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/my.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/r/gtid.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/r/slocket.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/r/stream.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/r/wdt.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_direct.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/r/xbstream_socket.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid-slave.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/t/gtid.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/t/slocket.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/t/stream.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/t/wdt.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_direct.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_hotbackup/t/xbstream_socket.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/combinations create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/include/have_rocksdb.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/include/have_rocksdb.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_gtid_crash_safe.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/include/rpl_no_unique_check_on_lag.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/my.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/consistent_snapshot_mixed_engines.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/mdev12179.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/multiclient_2pc.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/optimize_myrocks_replace_into.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rocksdb_slave_check_before_image_consistency.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_binlog_xid_count.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_crash_safe_wal_corrupt.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_ddl_high_priority.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_optimized.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_crash_safe_wal_corrupt.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_gtid_rocksdb_sys_header.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_missing_columns_sk_update.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_mts_dependency_unique_key_conflicts.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_no_unique_check_on_lag.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_no_unique_check_on_lag_mts.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_2pc_crash_recover.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_slave_gtid_info_optimized.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_snapshot.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_snapshot_without_gtid.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_rocksdb_stress_crash.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_skip_trx_api_binlog_format.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/rpl_xa.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_recovery.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/r/singledelete_idempotent_table.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/rpl_1slave_base.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/suite.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/suite.pm create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/consistent_snapshot_mixed_engines-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/consistent_snapshot_mixed_engines.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/disabled.def create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/mdev12179.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/multiclient_2pc.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/optimize_myrocks_replace_into.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency-slave.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rocksdb_slave_check_before_image_consistency.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_binlog_xid_count-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_binlog_xid_count.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_check_for_binlog_info.pl create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_crash_safe_wal_corrupt.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_ddl_high_priority.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe-slave.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized-slave.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_optimized.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_crash_safe_wal_corrupt.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header-slave.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_gtid_rocksdb_sys_header.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_missing_columns_sk_update.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_missing_columns_sk_update.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_mts_dependency_unique_key_conflicts.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag-slave.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts-slave.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_no_unique_check_on_lag_mts.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover-slave.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_2pc_crash_recover.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized-slave.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_slave_gtid_info_optimized.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot-slave.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_snapshot_without_gtid.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash-slave.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_rocksdb_stress_crash.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format-slave.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_skip_trx_api_binlog_format.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_xa.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/rpl_xa.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_recovery.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb_rpl/t/singledelete_idempotent_table.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_stress/combinations create mode 100644 storage/rocksdb/mysql-test/rocksdb_stress/include/have_rocksdb.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb_stress/include/have_rocksdb.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_stress/include/rocksdb_stress.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb_stress/my.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_stress/r/rocksdb_stress_crash.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_stress/suite.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_stress/suite.pm create mode 100644 storage/rocksdb/mysql-test/rocksdb_stress/t/disabled.def create mode 100644 storage/rocksdb/mysql-test/rocksdb_stress/t/load_generator.py create mode 100644 storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_stress/t/rocksdb_stress_crash.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/include/correctboolvalue.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/include/have_rocksdb.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/include/have_rocksdb.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/include/rocksdb_sys_var.inc create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/my.cnf create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/all_vars.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_access_hint_on_compaction_start_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_advise_random_on_open_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_concurrent_memtable_write_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_mmap_reads_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_mmap_writes_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_allow_to_start_after_corruption_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_blind_delete_primary_key_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_cache_size_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_restart_interval_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_size_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_block_size_deviation_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_sk_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_allow_unsorted_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bulk_load_size_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_bytes_per_sync_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_dump_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_high_pri_pool_ratio_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_blocks_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_cache_index_and_filter_with_high_priority_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_checksums_pct_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_collect_sst_properties_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_in_the_middle_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_commit_time_batch_for_recovery_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compact_cf_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_readahead_size_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_count_sd_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_file_size_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_compaction_sequential_deletes_window_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_checkpoint_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_if_missing_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_create_missing_column_families_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_datadir_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_db_write_buffer_size_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_deadlock_detect_depth_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_manual_compaction_delay_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_optimizer_no_zero_cardinality_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_ignore_pk_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_read_filter_ts_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_rec_ts_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_debug_ttl_snapshot_ts_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_default_cf_options_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delayed_write_rate_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_cf_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_delete_obsolete_files_period_micros_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_2pc_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_bulk_load_api_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_insert_with_update_caching_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_thread_tracking_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_ttl_read_filtering_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_enable_write_thread_adaptive_yield_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_error_if_exists_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_error_on_suboptimal_collation_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_flush_log_at_trx_commit_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_compute_memtable_stats_cachetime_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_and_lzero_now_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_flush_memtable_now_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_force_index_records_in_range_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_git_hash_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_hash_index_allow_collision_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_ignore_unknown_options_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_index_type_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_info_log_level_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_io_write_timeout_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_is_fd_close_on_exec_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_keep_log_file_num_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_large_prefix_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_lock_scanned_rows_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_lock_wait_timeout_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_log_file_time_to_roll_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manifest_preallocation_size_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_compaction_threads_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_manual_wal_flush_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_master_skip_tx_api_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_background_jobs_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_latest_deadlocks_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_log_file_size_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_manifest_file_size_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_manual_compactions_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_open_files_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_row_locks_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_subcompactions_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_max_total_wal_size_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_buf_size_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_combine_read_size_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_merge_tmp_file_removal_delay_ms_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_new_table_reader_for_compaction_inputs_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_no_block_cache_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_override_cf_options_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_paranoid_checks_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_pause_background_work_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_perf_context_level_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_path_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_persistent_cache_size_mb_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_pin_l0_filter_and_index_blocks_in_cache_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_print_snapshot_conflict_queries_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rate_limiter_bytes_per_sec_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_read_free_rpl_tables_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_records_in_range_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_remove_mariabackup_checkpoint_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_reset_stats_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_rollback_on_timeout_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_seconds_between_stat_computes_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_signal_drop_index_thread_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sim_cache_size_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_bloom_filter_on_read_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_fill_cache_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_skip_unique_check_tables_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_sst_mgr_rate_bytes_per_sec_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_dump_period_sec_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_level_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_stats_recalc_rate_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_store_row_debug_checksums_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_strict_collation_check_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_strict_collation_exceptions_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_supported_compression_types_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_table_cache_numshardbits_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_table_stats_sampling_pct_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_tmpdir_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_trace_sst_api_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_two_write_queues_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_unsafe_for_binlog_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_update_cf_options_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_adaptive_mutex_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_clock_cache_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_io_for_flush_and_compaction_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_direct_reads_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_use_fsync_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_validate_tables_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_verify_row_debug_checksums_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_bytes_per_sync_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_dir_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_recovery_mode_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_size_limit_mb_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_wal_ttl_seconds_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_whole_key_filtering_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_batch_max_bytes_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_disable_wal_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_ignore_missing_column_families_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/r/rocksdb_write_policy_basic.result create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/suite.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/suite.pm create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/all_vars.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/disabled.def create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_access_hint_on_compaction_start_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_advise_random_on_open_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_concurrent_memtable_write_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_mmap_reads_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_mmap_writes_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_allow_to_start_after_corruption_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_blind_delete_primary_key_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_cache_size_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_restart_interval_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_size_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_block_size_deviation_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_sk_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_allow_unsorted_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bulk_load_size_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_bytes_per_sync_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_dump_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_high_pri_pool_ratio_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_blocks_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_cache_index_and_filter_with_high_priority_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_checksums_pct_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_collect_sst_properties_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_commit_in_the_middle_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_commit_time_batch_for_recovery_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compact_cf_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_readahead_size_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_count_sd_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_file_size_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_compaction_sequential_deletes_window_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_checkpoint_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_if_missing_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_create_missing_column_families_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_datadir_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_db_write_buffer_size_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_deadlock_detect_depth_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_manual_compaction_delay_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_optimizer_no_zero_cardinality_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_ignore_pk_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_read_filter_ts_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_rec_ts_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_debug_ttl_snapshot_ts_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_default_cf_options_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delayed_write_rate_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic-master.opt create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_cf_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_delete_obsolete_files_period_micros_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_2pc_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_bulk_load_api_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_insert_with_update_caching_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_thread_tracking_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_ttl_read_filtering_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_enable_write_thread_adaptive_yield_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_error_if_exists_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_error_on_suboptimal_collation_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_flush_log_at_trx_commit_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_compute_memtable_stats_cachetime_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_and_lzero_now_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_flush_memtable_now_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_force_index_records_in_range_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_git_hash_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_hash_index_allow_collision_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_ignore_unknown_options_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_index_type_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_info_log_level_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_io_write_timeout_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_is_fd_close_on_exec_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_keep_log_file_num_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_large_prefix_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_lock_scanned_rows_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_lock_wait_timeout_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_log_file_time_to_roll_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manifest_preallocation_size_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_compaction_threads_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_manual_wal_flush_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_master_skip_tx_api_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_background_jobs_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_latest_deadlocks_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_log_file_size_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_manifest_file_size_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_manual_compactions_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_open_files_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_row_locks_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_subcompactions_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_max_total_wal_size_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_buf_size_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_combine_read_size_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_merge_tmp_file_removal_delay_ms_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_new_table_reader_for_compaction_inputs_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_no_block_cache_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_override_cf_options_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_paranoid_checks_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_pause_background_work_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_perf_context_level_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_path_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_persistent_cache_size_mb_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_pin_l0_filter_and_index_blocks_in_cache_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_print_snapshot_conflict_queries_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rate_limiter_bytes_per_sec_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_read_free_rpl_tables_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_records_in_range_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_remove_mariabackup_checkpoint_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_reset_stats_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_rollback_on_timeout_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_seconds_between_stat_computes_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_signal_drop_index_thread_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sim_cache_size_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_bloom_filter_on_read_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_fill_cache_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_skip_unique_check_tables_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_sst_mgr_rate_bytes_per_sec_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_dump_period_sec_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_level_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_stats_recalc_rate_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_store_row_debug_checksums_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_strict_collation_check_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_strict_collation_exceptions_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_supported_compression_types_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_table_cache_numshardbits_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_table_stats_sampling_pct_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_tmpdir_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_trace_sst_api_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_two_write_queues_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_unsafe_for_binlog_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_update_cf_options_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_adaptive_mutex_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_clock_cache_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_io_for_flush_and_compaction_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_direct_reads_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_use_fsync_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_validate_tables_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_verify_row_debug_checksums_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_bytes_per_sync_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_dir_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_recovery_mode_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_size_limit_mb_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_wal_ttl_seconds_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_whole_key_filtering_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_batch_max_bytes_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_disable_wal_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_ignore_missing_column_families_basic.test create mode 100644 storage/rocksdb/mysql-test/rocksdb_sys_vars/t/rocksdb_write_policy_basic.test create mode 100644 storage/rocksdb/mysql-test/storage_engine/cache_index.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/checksum_table_live.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/cleanup_engine.inc create mode 100644 storage/rocksdb/mysql-test/storage_engine/define_engine.inc create mode 100644 storage/rocksdb/mysql-test/storage_engine/disabled.def create mode 100644 storage/rocksdb/mysql-test/storage_engine/index.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/index_type_btree.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/index_type_hash.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/mask_engine.inc create mode 100644 storage/rocksdb/mysql-test/storage_engine/misc.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/parts/checksum_table.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/parts/create_table.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/parts/disabled.def create mode 100644 storage/rocksdb/mysql-test/storage_engine/parts/suite.opt create mode 100644 storage/rocksdb/mysql-test/storage_engine/show_engine.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/show_table_status.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/suite.opt create mode 100644 storage/rocksdb/mysql-test/storage_engine/tbl_opt_insert_method.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/tbl_opt_union.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/tbl_temporary.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/truncate_table.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/trx/delete.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/trx/disabled.def create mode 100644 storage/rocksdb/mysql-test/storage_engine/trx/insert.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/trx/level_read_committed.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/trx/level_repeatable_read.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/trx/suite.opt create mode 100644 storage/rocksdb/mysql-test/storage_engine/trx/update.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/type_binary_indexes.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/type_bit_indexes.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/type_enum_indexes.rdiff create mode 100644 storage/rocksdb/mysql-test/storage_engine/type_set_indexes.rdiff create mode 100644 storage/rocksdb/nosql_access.cc create mode 100644 storage/rocksdb/nosql_access.h create mode 100644 storage/rocksdb/properties_collector.cc create mode 100644 storage/rocksdb/properties_collector.h create mode 100644 storage/rocksdb/rdb_buff.h create mode 100644 storage/rocksdb/rdb_cf_manager.cc create mode 100644 storage/rocksdb/rdb_cf_manager.h create mode 100644 storage/rocksdb/rdb_cf_options.cc create mode 100644 storage/rocksdb/rdb_cf_options.h create mode 100644 storage/rocksdb/rdb_compact_filter.h create mode 100644 storage/rocksdb/rdb_comparator.h create mode 100644 storage/rocksdb/rdb_converter.cc create mode 100644 storage/rocksdb/rdb_converter.h create mode 100644 storage/rocksdb/rdb_datadic.cc create mode 100644 storage/rocksdb/rdb_datadic.h create mode 100644 storage/rocksdb/rdb_global.h create mode 100644 storage/rocksdb/rdb_i_s.cc create mode 100644 storage/rocksdb/rdb_i_s.h create mode 100644 storage/rocksdb/rdb_index_merge.cc create mode 100644 storage/rocksdb/rdb_index_merge.h create mode 100644 storage/rocksdb/rdb_io_watchdog.cc create mode 100644 storage/rocksdb/rdb_io_watchdog.h create mode 100644 storage/rocksdb/rdb_mariadb_port.h create mode 100644 storage/rocksdb/rdb_mariadb_server_port.cc create mode 100644 storage/rocksdb/rdb_mariadb_server_port.h create mode 100644 storage/rocksdb/rdb_mutex_wrapper.cc create mode 100644 storage/rocksdb/rdb_mutex_wrapper.h create mode 100644 storage/rocksdb/rdb_perf_context.cc create mode 100644 storage/rocksdb/rdb_perf_context.h create mode 100644 storage/rocksdb/rdb_psi.cc create mode 100644 storage/rocksdb/rdb_psi.h create mode 100644 storage/rocksdb/rdb_source_revision.h create mode 100644 storage/rocksdb/rdb_source_revision.h.in create mode 100644 storage/rocksdb/rdb_sst_info.cc create mode 100644 storage/rocksdb/rdb_sst_info.h create mode 100644 storage/rocksdb/rdb_threads.cc create mode 100644 storage/rocksdb/rdb_threads.h create mode 100644 storage/rocksdb/rdb_utils.cc create mode 100644 storage/rocksdb/rdb_utils.h create mode 100644 storage/rocksdb/rocksdb-range-access.txt create mode 100644 storage/rocksdb/rocksdb/.clang-format create mode 100644 storage/rocksdb/rocksdb/.gitignore create mode 100644 storage/rocksdb/rocksdb/.lgtm.yml create mode 100644 storage/rocksdb/rocksdb/.travis.yml create mode 100644 storage/rocksdb/rocksdb/.watchmanconfig create mode 100644 storage/rocksdb/rocksdb/AUTHORS create mode 100644 storage/rocksdb/rocksdb/CMakeLists.txt create mode 100644 storage/rocksdb/rocksdb/CODE_OF_CONDUCT.md create mode 100644 storage/rocksdb/rocksdb/CONTRIBUTING.md create mode 100644 storage/rocksdb/rocksdb/COPYING create mode 100644 storage/rocksdb/rocksdb/DEFAULT_OPTIONS_HISTORY.md create mode 100644 storage/rocksdb/rocksdb/DUMP_FORMAT.md create mode 100644 storage/rocksdb/rocksdb/HISTORY.md create mode 100644 storage/rocksdb/rocksdb/INSTALL.md create mode 100644 storage/rocksdb/rocksdb/LANGUAGE-BINDINGS.md create mode 100644 storage/rocksdb/rocksdb/LICENSE.Apache create mode 100644 storage/rocksdb/rocksdb/LICENSE.leveldb create mode 100644 storage/rocksdb/rocksdb/Makefile create mode 100644 storage/rocksdb/rocksdb/README.md create mode 100644 storage/rocksdb/rocksdb/ROCKSDB_LITE.md create mode 100644 storage/rocksdb/rocksdb/TARGETS create mode 100644 storage/rocksdb/rocksdb/USERS.md create mode 100644 storage/rocksdb/rocksdb/Vagrantfile create mode 100644 storage/rocksdb/rocksdb/WINDOWS_PORT.md create mode 100644 storage/rocksdb/rocksdb/appveyor.yml create mode 100644 storage/rocksdb/rocksdb/buckifier/buckify_rocksdb.py create mode 100755 storage/rocksdb/rocksdb/buckifier/rocks_test_runner.sh create mode 100644 storage/rocksdb/rocksdb/buckifier/targets_builder.py create mode 100644 storage/rocksdb/rocksdb/buckifier/targets_cfg.py create mode 100644 storage/rocksdb/rocksdb/buckifier/util.py create mode 100755 storage/rocksdb/rocksdb/build_tools/amalgamate.py create mode 100755 storage/rocksdb/rocksdb/build_tools/build_detect_platform create mode 100644 storage/rocksdb/rocksdb/build_tools/dependencies.sh create mode 100644 storage/rocksdb/rocksdb/build_tools/dependencies_4.8.1.sh create mode 100644 storage/rocksdb/rocksdb/build_tools/dependencies_platform007.sh create mode 100755 storage/rocksdb/rocksdb/build_tools/dockerbuild.sh create mode 100644 storage/rocksdb/rocksdb/build_tools/error_filter.py create mode 100755 storage/rocksdb/rocksdb/build_tools/fb_compile_mongo.sh create mode 100644 storage/rocksdb/rocksdb/build_tools/fbcode_config.sh create mode 100644 storage/rocksdb/rocksdb/build_tools/fbcode_config4.8.1.sh create mode 100644 storage/rocksdb/rocksdb/build_tools/fbcode_config_platform007.sh create mode 100755 storage/rocksdb/rocksdb/build_tools/format-diff.sh create mode 100755 storage/rocksdb/rocksdb/build_tools/gnu_parallel create mode 100755 storage/rocksdb/rocksdb/build_tools/make_package.sh create mode 100755 storage/rocksdb/rocksdb/build_tools/precommit_checker.py create mode 100755 storage/rocksdb/rocksdb/build_tools/regression_build_test.sh create mode 100755 storage/rocksdb/rocksdb/build_tools/rocksdb-lego-determinator create mode 100644 storage/rocksdb/rocksdb/build_tools/run_ci_db_test.ps1 create mode 100755 storage/rocksdb/rocksdb/build_tools/setup_centos7.sh create mode 100755 storage/rocksdb/rocksdb/build_tools/update_dependencies.sh create mode 100755 storage/rocksdb/rocksdb/build_tools/version.sh create mode 100644 storage/rocksdb/rocksdb/cache/cache_bench.cc create mode 100644 storage/rocksdb/rocksdb/cache/cache_test.cc create mode 100644 storage/rocksdb/rocksdb/cache/clock_cache.cc create mode 100644 storage/rocksdb/rocksdb/cache/clock_cache.h create mode 100644 storage/rocksdb/rocksdb/cache/lru_cache.cc create mode 100644 storage/rocksdb/rocksdb/cache/lru_cache.h create mode 100644 storage/rocksdb/rocksdb/cache/lru_cache_test.cc create mode 100644 storage/rocksdb/rocksdb/cache/sharded_cache.cc create mode 100644 storage/rocksdb/rocksdb/cache/sharded_cache.h create mode 100644 storage/rocksdb/rocksdb/cmake/RocksDBConfig.cmake.in create mode 100644 storage/rocksdb/rocksdb/cmake/modules/FindJeMalloc.cmake create mode 100644 storage/rocksdb/rocksdb/cmake/modules/FindNUMA.cmake create mode 100644 storage/rocksdb/rocksdb/cmake/modules/FindTBB.cmake create mode 100644 storage/rocksdb/rocksdb/cmake/modules/Findgflags.cmake create mode 100644 storage/rocksdb/rocksdb/cmake/modules/Findlz4.cmake create mode 100644 storage/rocksdb/rocksdb/cmake/modules/Findsnappy.cmake create mode 100644 storage/rocksdb/rocksdb/cmake/modules/Findzstd.cmake create mode 100644 storage/rocksdb/rocksdb/cmake/modules/ReadVersion.cmake create mode 100755 storage/rocksdb/rocksdb/coverage/coverage_test.sh create mode 100644 storage/rocksdb/rocksdb/coverage/parse_gcov_output.py create mode 100644 storage/rocksdb/rocksdb/db/arena_wrapped_db_iter.cc create mode 100644 storage/rocksdb/rocksdb/db/arena_wrapped_db_iter.h create mode 100644 storage/rocksdb/rocksdb/db/blob_index.h create mode 100644 storage/rocksdb/rocksdb/db/builder.cc create mode 100644 storage/rocksdb/rocksdb/db/builder.h create mode 100644 storage/rocksdb/rocksdb/db/c.cc create mode 100644 storage/rocksdb/rocksdb/db/c_test.c create mode 100644 storage/rocksdb/rocksdb/db/column_family.cc create mode 100644 storage/rocksdb/rocksdb/db/column_family.h create mode 100644 storage/rocksdb/rocksdb/db/column_family_test.cc create mode 100644 storage/rocksdb/rocksdb/db/compact_files_test.cc create mode 100644 storage/rocksdb/rocksdb/db/compacted_db_impl.cc create mode 100644 storage/rocksdb/rocksdb/db/compacted_db_impl.h create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction.cc create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction.h create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_iteration_stats.h create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_iterator.cc create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_iterator.h create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_iterator_test.cc create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_job.cc create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_job.h create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_job_stats_test.cc create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_job_test.cc create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_picker.cc create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_picker.h create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.cc create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_picker_fifo.h create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_picker_level.cc create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_picker_level.h create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_picker_test.cc create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_picker_universal.cc create mode 100644 storage/rocksdb/rocksdb/db/compaction/compaction_picker_universal.h create mode 100644 storage/rocksdb/rocksdb/db/comparator_db_test.cc create mode 100644 storage/rocksdb/rocksdb/db/convenience.cc create mode 100644 storage/rocksdb/rocksdb/db/corruption_test.cc create mode 100644 storage/rocksdb/rocksdb/db/cuckoo_table_db_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_basic_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_blob_index_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_block_cache_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_bloom_filter_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_compaction_filter_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_compaction_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_dynamic_level_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_encryption_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_filesnapshot.cc create mode 100644 storage/rocksdb/rocksdb/db/db_flush_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_impl/db_impl.cc create mode 100644 storage/rocksdb/rocksdb/db/db_impl/db_impl.h create mode 100644 storage/rocksdb/rocksdb/db/db_impl/db_impl_compaction_flush.cc create mode 100644 storage/rocksdb/rocksdb/db/db_impl/db_impl_debug.cc create mode 100644 storage/rocksdb/rocksdb/db/db_impl/db_impl_experimental.cc create mode 100644 storage/rocksdb/rocksdb/db/db_impl/db_impl_files.cc create mode 100644 storage/rocksdb/rocksdb/db/db_impl/db_impl_open.cc create mode 100644 storage/rocksdb/rocksdb/db/db_impl/db_impl_readonly.cc create mode 100644 storage/rocksdb/rocksdb/db/db_impl/db_impl_readonly.h create mode 100644 storage/rocksdb/rocksdb/db/db_impl/db_impl_secondary.cc create mode 100644 storage/rocksdb/rocksdb/db/db_impl/db_impl_secondary.h create mode 100644 storage/rocksdb/rocksdb/db/db_impl/db_impl_write.cc create mode 100644 storage/rocksdb/rocksdb/db/db_impl/db_secondary_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_info_dumper.cc create mode 100644 storage/rocksdb/rocksdb/db/db_info_dumper.h create mode 100644 storage/rocksdb/rocksdb/db/db_inplace_update_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_io_failure_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_iter.cc create mode 100644 storage/rocksdb/rocksdb/db/db_iter.h create mode 100644 storage/rocksdb/rocksdb/db/db_iter_stress_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_iter_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_iterator_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_log_iter_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_memtable_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_merge_operand_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_merge_operator_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_options_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_properties_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_range_del_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_sst_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_statistics_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_table_properties_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_tailing_iter_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_test2.cc create mode 100644 storage/rocksdb/rocksdb/db/db_test_util.cc create mode 100644 storage/rocksdb/rocksdb/db/db_test_util.h create mode 100644 storage/rocksdb/rocksdb/db/db_universal_compaction_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_wal_test.cc create mode 100644 storage/rocksdb/rocksdb/db/db_write_test.cc create mode 100644 storage/rocksdb/rocksdb/db/dbformat.cc create mode 100644 storage/rocksdb/rocksdb/db/dbformat.h create mode 100644 storage/rocksdb/rocksdb/db/dbformat_test.cc create mode 100644 storage/rocksdb/rocksdb/db/deletefile_test.cc create mode 100644 storage/rocksdb/rocksdb/db/error_handler.cc create mode 100644 storage/rocksdb/rocksdb/db/error_handler.h create mode 100644 storage/rocksdb/rocksdb/db/error_handler_test.cc create mode 100644 storage/rocksdb/rocksdb/db/event_helpers.cc create mode 100644 storage/rocksdb/rocksdb/db/event_helpers.h create mode 100644 storage/rocksdb/rocksdb/db/experimental.cc create mode 100644 storage/rocksdb/rocksdb/db/external_sst_file_basic_test.cc create mode 100644 storage/rocksdb/rocksdb/db/external_sst_file_ingestion_job.cc create mode 100644 storage/rocksdb/rocksdb/db/external_sst_file_ingestion_job.h create mode 100644 storage/rocksdb/rocksdb/db/external_sst_file_test.cc create mode 100644 storage/rocksdb/rocksdb/db/fault_injection_test.cc create mode 100644 storage/rocksdb/rocksdb/db/file_indexer.cc create mode 100644 storage/rocksdb/rocksdb/db/file_indexer.h create mode 100644 storage/rocksdb/rocksdb/db/file_indexer_test.cc create mode 100644 storage/rocksdb/rocksdb/db/filename_test.cc create mode 100644 storage/rocksdb/rocksdb/db/flush_job.cc create mode 100644 storage/rocksdb/rocksdb/db/flush_job.h create mode 100644 storage/rocksdb/rocksdb/db/flush_job_test.cc create mode 100644 storage/rocksdb/rocksdb/db/flush_scheduler.cc create mode 100644 storage/rocksdb/rocksdb/db/flush_scheduler.h create mode 100644 storage/rocksdb/rocksdb/db/forward_iterator.cc create mode 100644 storage/rocksdb/rocksdb/db/forward_iterator.h create mode 100644 storage/rocksdb/rocksdb/db/forward_iterator_bench.cc create mode 100644 storage/rocksdb/rocksdb/db/import_column_family_job.cc create mode 100644 storage/rocksdb/rocksdb/db/import_column_family_job.h create mode 100644 storage/rocksdb/rocksdb/db/import_column_family_test.cc create mode 100644 storage/rocksdb/rocksdb/db/internal_stats.cc create mode 100644 storage/rocksdb/rocksdb/db/internal_stats.h create mode 100644 storage/rocksdb/rocksdb/db/job_context.h create mode 100644 storage/rocksdb/rocksdb/db/listener_test.cc create mode 100644 storage/rocksdb/rocksdb/db/log_format.h create mode 100644 storage/rocksdb/rocksdb/db/log_reader.cc create mode 100644 storage/rocksdb/rocksdb/db/log_reader.h create mode 100644 storage/rocksdb/rocksdb/db/log_test.cc create mode 100644 storage/rocksdb/rocksdb/db/log_writer.cc create mode 100644 storage/rocksdb/rocksdb/db/log_writer.h create mode 100644 storage/rocksdb/rocksdb/db/logs_with_prep_tracker.cc create mode 100644 storage/rocksdb/rocksdb/db/logs_with_prep_tracker.h create mode 100644 storage/rocksdb/rocksdb/db/lookup_key.h create mode 100644 storage/rocksdb/rocksdb/db/malloc_stats.cc create mode 100644 storage/rocksdb/rocksdb/db/malloc_stats.h create mode 100644 storage/rocksdb/rocksdb/db/manual_compaction_test.cc create mode 100644 storage/rocksdb/rocksdb/db/memtable.cc create mode 100644 storage/rocksdb/rocksdb/db/memtable.h create mode 100644 storage/rocksdb/rocksdb/db/memtable_list.cc create mode 100644 storage/rocksdb/rocksdb/db/memtable_list.h create mode 100644 storage/rocksdb/rocksdb/db/memtable_list_test.cc create mode 100644 storage/rocksdb/rocksdb/db/merge_context.h create mode 100644 storage/rocksdb/rocksdb/db/merge_helper.cc create mode 100644 storage/rocksdb/rocksdb/db/merge_helper.h create mode 100644 storage/rocksdb/rocksdb/db/merge_helper_test.cc create mode 100644 storage/rocksdb/rocksdb/db/merge_operator.cc create mode 100644 storage/rocksdb/rocksdb/db/merge_test.cc create mode 100644 storage/rocksdb/rocksdb/db/obsolete_files_test.cc create mode 100644 storage/rocksdb/rocksdb/db/options_file_test.cc create mode 100644 storage/rocksdb/rocksdb/db/perf_context_test.cc create mode 100644 storage/rocksdb/rocksdb/db/pinned_iterators_manager.h create mode 100644 storage/rocksdb/rocksdb/db/plain_table_db_test.cc create mode 100644 storage/rocksdb/rocksdb/db/pre_release_callback.h create mode 100644 storage/rocksdb/rocksdb/db/prefix_test.cc create mode 100644 storage/rocksdb/rocksdb/db/range_del_aggregator.cc create mode 100644 storage/rocksdb/rocksdb/db/range_del_aggregator.h create mode 100644 storage/rocksdb/rocksdb/db/range_del_aggregator_bench.cc create mode 100644 storage/rocksdb/rocksdb/db/range_del_aggregator_test.cc create mode 100644 storage/rocksdb/rocksdb/db/range_tombstone_fragmenter.cc create mode 100644 storage/rocksdb/rocksdb/db/range_tombstone_fragmenter.h create mode 100644 storage/rocksdb/rocksdb/db/range_tombstone_fragmenter_test.cc create mode 100644 storage/rocksdb/rocksdb/db/read_callback.h create mode 100644 storage/rocksdb/rocksdb/db/repair.cc create mode 100644 storage/rocksdb/rocksdb/db/repair_test.cc create mode 100644 storage/rocksdb/rocksdb/db/snapshot_checker.h create mode 100644 storage/rocksdb/rocksdb/db/snapshot_impl.cc create mode 100644 storage/rocksdb/rocksdb/db/snapshot_impl.h create mode 100644 storage/rocksdb/rocksdb/db/table_cache.cc create mode 100644 storage/rocksdb/rocksdb/db/table_cache.h create mode 100644 storage/rocksdb/rocksdb/db/table_properties_collector.cc create mode 100644 storage/rocksdb/rocksdb/db/table_properties_collector.h create mode 100644 storage/rocksdb/rocksdb/db/table_properties_collector_test.cc create mode 100644 storage/rocksdb/rocksdb/db/transaction_log_impl.cc create mode 100644 storage/rocksdb/rocksdb/db/transaction_log_impl.h create mode 100644 storage/rocksdb/rocksdb/db/trim_history_scheduler.cc create mode 100644 storage/rocksdb/rocksdb/db/trim_history_scheduler.h create mode 100644 storage/rocksdb/rocksdb/db/version_builder.cc create mode 100644 storage/rocksdb/rocksdb/db/version_builder.h create mode 100644 storage/rocksdb/rocksdb/db/version_builder_test.cc create mode 100644 storage/rocksdb/rocksdb/db/version_edit.cc create mode 100644 storage/rocksdb/rocksdb/db/version_edit.h create mode 100644 storage/rocksdb/rocksdb/db/version_edit_test.cc create mode 100644 storage/rocksdb/rocksdb/db/version_set.cc create mode 100644 storage/rocksdb/rocksdb/db/version_set.h create mode 100644 storage/rocksdb/rocksdb/db/version_set_test.cc create mode 100644 storage/rocksdb/rocksdb/db/wal_manager.cc create mode 100644 storage/rocksdb/rocksdb/db/wal_manager.h create mode 100644 storage/rocksdb/rocksdb/db/wal_manager_test.cc create mode 100644 storage/rocksdb/rocksdb/db/write_batch.cc create mode 100644 storage/rocksdb/rocksdb/db/write_batch_base.cc create mode 100644 storage/rocksdb/rocksdb/db/write_batch_internal.h create mode 100644 storage/rocksdb/rocksdb/db/write_batch_test.cc create mode 100644 storage/rocksdb/rocksdb/db/write_callback.h create mode 100644 storage/rocksdb/rocksdb/db/write_callback_test.cc create mode 100644 storage/rocksdb/rocksdb/db/write_controller.cc create mode 100644 storage/rocksdb/rocksdb/db/write_controller.h create mode 100644 storage/rocksdb/rocksdb/db/write_controller_test.cc create mode 100644 storage/rocksdb/rocksdb/db/write_thread.cc create mode 100644 storage/rocksdb/rocksdb/db/write_thread.h create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/CMakeLists.txt create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/batched_ops_stress.cc create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/cf_consistency_stress.cc create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress.cc create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress_common.cc create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress_common.h create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress_driver.cc create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress_driver.h create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress_env_wrapper.h create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress_gflags.cc create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress_listener.h create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.cc create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress_shared_state.h create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress_stat.h create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.cc create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress_test_base.h create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/db_stress_tool.cc create mode 100644 storage/rocksdb/rocksdb/db_stress_tool/no_batched_ops_stress.cc create mode 100644 storage/rocksdb/rocksdb/defs.bzl create mode 100644 storage/rocksdb/rocksdb/docs/.gitignore create mode 100644 storage/rocksdb/rocksdb/docs/CNAME create mode 100644 storage/rocksdb/rocksdb/docs/CONTRIBUTING.md create mode 100644 storage/rocksdb/rocksdb/docs/Gemfile create mode 100644 storage/rocksdb/rocksdb/docs/Gemfile.lock create mode 100644 storage/rocksdb/rocksdb/docs/LICENSE-DOCUMENTATION create mode 100644 storage/rocksdb/rocksdb/docs/README.md create mode 100644 storage/rocksdb/rocksdb/docs/TEMPLATE-INFORMATION.md create mode 100644 storage/rocksdb/rocksdb/docs/_config.yml create mode 100644 storage/rocksdb/rocksdb/docs/_data/authors.yml create mode 100644 storage/rocksdb/rocksdb/docs/_data/features.yml create mode 100644 storage/rocksdb/rocksdb/docs/_data/nav.yml create mode 100644 storage/rocksdb/rocksdb/docs/_data/nav_docs.yml create mode 100644 storage/rocksdb/rocksdb/docs/_data/powered_by.yml create mode 100644 storage/rocksdb/rocksdb/docs/_data/powered_by_highlight.yml create mode 100644 storage/rocksdb/rocksdb/docs/_data/promo.yml create mode 100644 storage/rocksdb/rocksdb/docs/_docs/faq.md create mode 100644 storage/rocksdb/rocksdb/docs/_docs/getting-started.md create mode 100644 storage/rocksdb/rocksdb/docs/_includes/blog_pagination.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/content/gridblocks.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/content/items/gridblock.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/doc.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/doc_paging.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/footer.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/head.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/header.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/hero.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/home_header.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/katex_import.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/katex_render.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/nav.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/nav/collection_nav.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/nav/collection_nav_group.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/nav/collection_nav_group_item.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/nav/header_nav.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/nav_search.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/plugins/all_share.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/plugins/ascii_cinema.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/plugins/button.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/plugins/github_star.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/plugins/github_watch.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/plugins/google_share.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/plugins/iframe.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/plugins/like_button.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/plugins/plugin_row.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/plugins/post_social_plugins.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/plugins/slideshow.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/plugins/twitter_follow.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/plugins/twitter_share.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/post.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/powered_by.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/social_plugins.html create mode 100644 storage/rocksdb/rocksdb/docs/_includes/ui/button.html create mode 100644 storage/rocksdb/rocksdb/docs/_layouts/basic.html create mode 100644 storage/rocksdb/rocksdb/docs/_layouts/blog.html create mode 100644 storage/rocksdb/rocksdb/docs/_layouts/blog_default.html create mode 100644 storage/rocksdb/rocksdb/docs/_layouts/default.html create mode 100644 storage/rocksdb/rocksdb/docs/_layouts/doc_default.html create mode 100644 storage/rocksdb/rocksdb/docs/_layouts/doc_page.html create mode 100644 storage/rocksdb/rocksdb/docs/_layouts/docs.html create mode 100644 storage/rocksdb/rocksdb/docs/_layouts/home.html create mode 100644 storage/rocksdb/rocksdb/docs/_layouts/page.html create mode 100644 storage/rocksdb/rocksdb/docs/_layouts/plain.html create mode 100644 storage/rocksdb/rocksdb/docs/_layouts/post.html create mode 100644 storage/rocksdb/rocksdb/docs/_layouts/redirect.html create mode 100644 storage/rocksdb/rocksdb/docs/_layouts/top-level.html create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-03-27-how-to-backup-rocksdb.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-03-27-how-to-persist-in-memory-rocksdb-database.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-04-02-the-1st-rocksdb-local-meetup-held-on-march-27-2014.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-04-07-rocksdb-2-8-release.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-04-21-indexing-sst-files-for-better-lookup-performance.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-05-14-lock.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-05-19-rocksdb-3-0-release.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-05-22-rocksdb-3-1-release.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-06-23-plaintable-a-new-file-format.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-06-27-avoid-expensive-locks-in-get.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-06-27-rocksdb-3-2-release.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-07-29-rocksdb-3-3-release.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-09-12-cuckoo.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-09-12-new-bloom-filter-format.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2014-09-15-rocksdb-3-5-release.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2015-01-16-migrating-from-leveldb-to-rocksdb-2.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2015-02-24-reading-rocksdb-options-from-a-file.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2015-02-27-write-batch-with-index.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2015-04-22-integrating-rocksdb-with-mongodb-2.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2015-06-12-rocksdb-in-osquery.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2015-07-15-rocksdb-2015-h2-roadmap.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2015-07-17-spatial-indexing-in-rocksdb.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2015-07-22-rocksdb-is-now-available-in-windows-platform.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2015-07-23-dynamic-level.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2015-10-27-getthreadlist.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2015-11-10-use-checkpoints-for-efficient-snapshots.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2015-11-16-analysis-file-read-latency-by-level.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2016-01-29-compaction_pri.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2016-02-24-rocksdb-4-2-release.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2016-02-25-rocksdb-ama.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2016-03-07-rocksdb-options-file.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2016-04-26-rocksdb-4-5-1-released.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2016-07-26-rocksdb-4-8-released.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2016-09-28-rocksdb-4-11-2-released.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-01-06-rocksdb-5-0-1-released.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-02-07-rocksdb-5-1-2-released.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-02-17-bulkoad-ingest-sst-file.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-03-02-rocksdb-5-2-1-released.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-05-12-partitioned-index-filter.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-05-14-core-local-stats.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-05-26-rocksdb-5-4-5-released.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-06-26-17-level-based-changes.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-06-29-rocksdb-5-5-1-released.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-07-25-rocksdb-5-6-1-released.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-08-24-pinnableslice.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-08-25-flushwal.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-09-28-rocksdb-5-8-released.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-12-18-17-auto-tuned-rate-limiter.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2017-12-19-write-prepared-txn.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2018-02-05-rocksdb-5-10-2-released.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2018-08-01-rocksdb-tuning-advisor.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2018-08-23-data-block-hash-index.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2018-11-21-delete-range.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2019-03-08-format-version-4.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_posts/2019-08-15-unordered-write.markdown create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_base.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_blog.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_buttons.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_footer.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_gridBlock.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_header.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_poweredby.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_promo.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_react_docs_nav.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_react_header_nav.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_reset.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_search.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_slideshow.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_syntax-highlighting.scss create mode 100644 storage/rocksdb/rocksdb/docs/_sass/_tables.scss create mode 100644 storage/rocksdb/rocksdb/docs/_top-level/support.md create mode 100644 storage/rocksdb/rocksdb/docs/blog/all.html create mode 100644 storage/rocksdb/rocksdb/docs/blog/index.html create mode 100644 storage/rocksdb/rocksdb/docs/css/main.scss create mode 100644 storage/rocksdb/rocksdb/docs/doc-type-examples/2016-04-07-blog-post-example.md create mode 100644 storage/rocksdb/rocksdb/docs/doc-type-examples/docs-hello-world.md create mode 100644 storage/rocksdb/rocksdb/docs/doc-type-examples/top-level-example.md create mode 100644 storage/rocksdb/rocksdb/docs/docs/index.html create mode 100644 storage/rocksdb/rocksdb/docs/feed.xml create mode 100644 storage/rocksdb/rocksdb/docs/index.md create mode 100644 storage/rocksdb/rocksdb/docs/static/favicon.png create mode 100644 storage/rocksdb/rocksdb/docs/static/fonts/LatoLatin-Black.woff create mode 100644 storage/rocksdb/rocksdb/docs/static/fonts/LatoLatin-Black.woff2 create mode 100644 storage/rocksdb/rocksdb/docs/static/fonts/LatoLatin-BlackItalic.woff create mode 100644 storage/rocksdb/rocksdb/docs/static/fonts/LatoLatin-BlackItalic.woff2 create mode 100644 storage/rocksdb/rocksdb/docs/static/fonts/LatoLatin-Italic.woff create mode 100644 storage/rocksdb/rocksdb/docs/static/fonts/LatoLatin-Italic.woff2 create mode 100644 storage/rocksdb/rocksdb/docs/static/fonts/LatoLatin-Light.woff create mode 100644 storage/rocksdb/rocksdb/docs/static/fonts/LatoLatin-Light.woff2 create mode 100644 storage/rocksdb/rocksdb/docs/static/fonts/LatoLatin-Regular.woff create mode 100644 storage/rocksdb/rocksdb/docs/static/fonts/LatoLatin-Regular.woff2 create mode 100644 storage/rocksdb/rocksdb/docs/static/images/Resize-of-20140327_200754-300x225.jpg create mode 100644 storage/rocksdb/rocksdb/docs/static/images/binaryseek.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/bloom_fp_vs_bpk.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/compaction/full-range.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/compaction/l0-l1-contend.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/compaction/l1-l2-contend.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/compaction/part-range-old.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/data-block-hash-index/block-format-binary-seek.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/data-block-hash-index/block-format-hash-index.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/data-block-hash-index/hash-index-data-structure.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/data-block-hash-index/perf-cache-miss.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/data-block-hash-index/perf-throughput.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/delrange/delrange_collapsed.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/delrange/delrange_key_schema.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/delrange/delrange_sst_blocks.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/delrange/delrange_uncollapsed.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/delrange/delrange_write_path.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/pcache-blockindex.jpg create mode 100644 storage/rocksdb/rocksdb/docs/static/images/pcache-fileindex.jpg create mode 100644 storage/rocksdb/rocksdb/docs/static/images/pcache-filelayout.jpg create mode 100644 storage/rocksdb/rocksdb/docs/static/images/pcache-readiopath.jpg create mode 100644 storage/rocksdb/rocksdb/docs/static/images/pcache-tieredstorage.jpg create mode 100644 storage/rocksdb/rocksdb/docs/static/images/pcache-writeiopath.jpg create mode 100644 storage/rocksdb/rocksdb/docs/static/images/promo-adapt.svg create mode 100644 storage/rocksdb/rocksdb/docs/static/images/promo-flash.svg create mode 100644 storage/rocksdb/rocksdb/docs/static/images/promo-operations.svg create mode 100644 storage/rocksdb/rocksdb/docs/static/images/promo-performance.svg create mode 100644 storage/rocksdb/rocksdb/docs/static/images/rate-limiter/auto-tuned-write-KBps-series.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/rate-limiter/write-KBps-cdf.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/rate-limiter/write-KBps-series.png create mode 100644 storage/rocksdb/rocksdb/docs/static/images/tree_example1.png create mode 100644 storage/rocksdb/rocksdb/docs/static/logo.svg create mode 100644 storage/rocksdb/rocksdb/docs/static/og_image.png create mode 100644 storage/rocksdb/rocksdb/env/composite_env_wrapper.h create mode 100644 storage/rocksdb/rocksdb/env/env.cc create mode 100644 storage/rocksdb/rocksdb/env/env_basic_test.cc create mode 100644 storage/rocksdb/rocksdb/env/env_chroot.cc create mode 100644 storage/rocksdb/rocksdb/env/env_chroot.h create mode 100644 storage/rocksdb/rocksdb/env/env_encryption.cc create mode 100644 storage/rocksdb/rocksdb/env/env_hdfs.cc create mode 100644 storage/rocksdb/rocksdb/env/env_posix.cc create mode 100644 storage/rocksdb/rocksdb/env/env_test.cc create mode 100644 storage/rocksdb/rocksdb/env/file_system.cc create mode 100644 storage/rocksdb/rocksdb/env/fs_posix.cc create mode 100644 storage/rocksdb/rocksdb/env/io_posix.cc create mode 100644 storage/rocksdb/rocksdb/env/io_posix.h create mode 100644 storage/rocksdb/rocksdb/env/mock_env.cc create mode 100644 storage/rocksdb/rocksdb/env/mock_env.h create mode 100644 storage/rocksdb/rocksdb/env/mock_env_test.cc create mode 100644 storage/rocksdb/rocksdb/examples/.gitignore create mode 100644 storage/rocksdb/rocksdb/examples/Makefile create mode 100644 storage/rocksdb/rocksdb/examples/README.md create mode 100644 storage/rocksdb/rocksdb/examples/c_simple_example.c create mode 100644 storage/rocksdb/rocksdb/examples/column_families_example.cc create mode 100644 storage/rocksdb/rocksdb/examples/compact_files_example.cc create mode 100644 storage/rocksdb/rocksdb/examples/compaction_filter_example.cc create mode 100644 storage/rocksdb/rocksdb/examples/multi_processes_example.cc create mode 100644 storage/rocksdb/rocksdb/examples/optimistic_transaction_example.cc create mode 100644 storage/rocksdb/rocksdb/examples/options_file_example.cc create mode 100644 storage/rocksdb/rocksdb/examples/rocksdb_option_file_example.ini create mode 100644 storage/rocksdb/rocksdb/examples/simple_example.cc create mode 100644 storage/rocksdb/rocksdb/examples/transaction_example.cc create mode 100644 storage/rocksdb/rocksdb/file/delete_scheduler.cc create mode 100644 storage/rocksdb/rocksdb/file/delete_scheduler.h create mode 100644 storage/rocksdb/rocksdb/file/delete_scheduler_test.cc create mode 100644 storage/rocksdb/rocksdb/file/file_prefetch_buffer.cc create mode 100644 storage/rocksdb/rocksdb/file/file_prefetch_buffer.h create mode 100644 storage/rocksdb/rocksdb/file/file_util.cc create mode 100644 storage/rocksdb/rocksdb/file/file_util.h create mode 100644 storage/rocksdb/rocksdb/file/filename.cc create mode 100644 storage/rocksdb/rocksdb/file/filename.h create mode 100644 storage/rocksdb/rocksdb/file/random_access_file_reader.cc create mode 100644 storage/rocksdb/rocksdb/file/random_access_file_reader.h create mode 100644 storage/rocksdb/rocksdb/file/read_write_util.cc create mode 100644 storage/rocksdb/rocksdb/file/read_write_util.h create mode 100644 storage/rocksdb/rocksdb/file/readahead_raf.cc create mode 100644 storage/rocksdb/rocksdb/file/readahead_raf.h create mode 100644 storage/rocksdb/rocksdb/file/sequence_file_reader.cc create mode 100644 storage/rocksdb/rocksdb/file/sequence_file_reader.h create mode 100644 storage/rocksdb/rocksdb/file/sst_file_manager_impl.cc create mode 100644 storage/rocksdb/rocksdb/file/sst_file_manager_impl.h create mode 100644 storage/rocksdb/rocksdb/file/writable_file_writer.cc create mode 100644 storage/rocksdb/rocksdb/file/writable_file_writer.h create mode 100644 storage/rocksdb/rocksdb/hdfs/README create mode 100644 storage/rocksdb/rocksdb/hdfs/env_hdfs.h create mode 100755 storage/rocksdb/rocksdb/hdfs/setup.sh create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/advanced_options.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/c.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/cache.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/cleanable.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/compaction_filter.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/compaction_job_stats.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/comparator.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/concurrent_task_limiter.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/convenience.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/db.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/db_bench_tool.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/db_dump_tool.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/db_stress_tool.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/env.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/env_encryption.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/experimental.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/file_checksum.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/file_system.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/filter_policy.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/flush_block_policy.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/io_status.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/iostats_context.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/iterator.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/ldb_tool.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/listener.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/memory_allocator.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/memtablerep.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/merge_operator.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/metadata.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/options.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/perf_context.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/perf_level.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/persistent_cache.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/rate_limiter.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/rocksdb_namespace.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/slice.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/slice_transform.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/snapshot.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/sst_dump_tool.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/sst_file_manager.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/sst_file_reader.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/sst_file_writer.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/statistics.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/stats_history.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/status.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/table.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/table_properties.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/thread_status.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/threadpool.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/trace_reader_writer.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/transaction_log.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/types.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/universal_compaction.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/backupable_db.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/checkpoint.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/convenience.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/db_ttl.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/debug.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/env_librados.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/env_mirror.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/info_log_finder.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/ldb_cmd_execute_result.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/leveldb_options.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_custom_library.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/lua/rocks_lua_util.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/memory_util.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/object_registry.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/optimistic_transaction_db.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/option_change_migration.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/options_util.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/sim_cache.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/stackable_db.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/table_properties_collectors.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/transaction.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/transaction_db_mutex.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/utility_db.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/utilities/write_batch_with_index.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/version.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/wal_filter.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/write_batch.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/write_batch_base.h create mode 100644 storage/rocksdb/rocksdb/include/rocksdb/write_buffer_manager.h create mode 100644 storage/rocksdb/rocksdb/issue_template.md create mode 100644 storage/rocksdb/rocksdb/java/CMakeLists.txt create mode 100644 storage/rocksdb/rocksdb/java/HISTORY-JAVA.md create mode 100644 storage/rocksdb/rocksdb/java/Makefile create mode 100644 storage/rocksdb/rocksdb/java/RELEASE.md create mode 100644 storage/rocksdb/rocksdb/java/benchmark/src/main/java/org/rocksdb/benchmark/DbBenchmark.java create mode 100644 storage/rocksdb/rocksdb/java/crossbuild/Vagrantfile create mode 100755 storage/rocksdb/rocksdb/java/crossbuild/build-linux-alpine.sh create mode 100755 storage/rocksdb/rocksdb/java/crossbuild/build-linux-centos.sh create mode 100755 storage/rocksdb/rocksdb/java/crossbuild/build-linux.sh create mode 100755 storage/rocksdb/rocksdb/java/crossbuild/docker-build-linux-alpine.sh create mode 100755 storage/rocksdb/rocksdb/java/crossbuild/docker-build-linux-centos.sh create mode 100755 storage/rocksdb/rocksdb/java/jdb_bench.sh create mode 100644 storage/rocksdb/rocksdb/java/jmh/LICENSE-HEADER.txt create mode 100644 storage/rocksdb/rocksdb/java/jmh/README.md create mode 100644 storage/rocksdb/rocksdb/java/jmh/pom.xml create mode 100644 storage/rocksdb/rocksdb/java/jmh/src/main/java/org/rocksdb/jmh/ComparatorBenchmarks.java create mode 100644 storage/rocksdb/rocksdb/java/jmh/src/main/java/org/rocksdb/jmh/GetBenchmarks.java create mode 100644 storage/rocksdb/rocksdb/java/jmh/src/main/java/org/rocksdb/jmh/MultiGetBenchmarks.java create mode 100644 storage/rocksdb/rocksdb/java/jmh/src/main/java/org/rocksdb/jmh/PutBenchmarks.java create mode 100644 storage/rocksdb/rocksdb/java/jmh/src/main/java/org/rocksdb/util/FileUtils.java create mode 100644 storage/rocksdb/rocksdb/java/jmh/src/main/java/org/rocksdb/util/KVUtils.java create mode 100644 storage/rocksdb/rocksdb/java/rocksjni.pom create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/backupablejni.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/backupenginejni.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/cassandra_compactionfilterjni.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/cassandra_value_operator.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/checkpoint.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/clock_cache.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/columnfamilyhandle.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/compact_range_options.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/compaction_filter.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/compaction_filter_factory.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/compaction_filter_factory_jnicallback.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/compaction_filter_factory_jnicallback.h create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/compaction_job_info.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/compaction_job_stats.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/compaction_options.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/compaction_options_fifo.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/compaction_options_universal.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/comparator.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/comparatorjnicallback.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/comparatorjnicallback.h create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/compression_options.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/env.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/env_options.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/filter.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/ingest_external_file_options.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/iterator.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/jnicallback.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/jnicallback.h create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/loggerjnicallback.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/loggerjnicallback.h create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/lru_cache.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/memory_util.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/memtablejni.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/merge_operator.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/native_comparator_wrapper_test.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/optimistic_transaction_db.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/optimistic_transaction_options.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/options.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/options_util.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/persistent_cache.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/portal.h create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/ratelimiterjni.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/remove_emptyvalue_compactionfilterjni.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/restorejni.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/rocks_callback_object.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/rocksdb_exception_test.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/rocksjni.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/slice.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/snapshot.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/sst_file_manager.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/sst_file_reader_iterator.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/sst_file_readerjni.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/sst_file_writerjni.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/statistics.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/statisticsjni.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/statisticsjni.h create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/table.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/table_filter.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/table_filter_jnicallback.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/table_filter_jnicallback.h create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/thread_status.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/trace_writer.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/trace_writer_jnicallback.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/trace_writer_jnicallback.h create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/transaction.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/transaction_db.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/transaction_db_options.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/transaction_log.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/transaction_notifier.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/transaction_notifier_jnicallback.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/transaction_notifier_jnicallback.h create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/transaction_options.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/ttl.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/wal_filter.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/wal_filter_jnicallback.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/wal_filter_jnicallback.h create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/write_batch.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/write_batch_test.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/write_batch_with_index.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/write_buffer_manager.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/writebatchhandlerjnicallback.cc create mode 100644 storage/rocksdb/rocksdb/java/rocksjni/writebatchhandlerjnicallback.h create mode 100644 storage/rocksdb/rocksdb/java/samples/src/main/java/OptimisticTransactionSample.java create mode 100644 storage/rocksdb/rocksdb/java/samples/src/main/java/RocksDBColumnFamilySample.java create mode 100644 storage/rocksdb/rocksdb/java/samples/src/main/java/RocksDBSample.java create mode 100644 storage/rocksdb/rocksdb/java/samples/src/main/java/TransactionSample.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractCompactionFilter.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractCompactionFilterFactory.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractComparator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractComparatorJniBridge.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractImmutableNativeReference.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractMutableOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractNativeReference.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractRocksIterator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractSlice.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractTableFilter.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractTraceWriter.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractTransactionNotifier.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractWalFilter.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AbstractWriteBatch.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AccessHint.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AdvancedColumnFamilyOptionsInterface.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/AdvancedMutableColumnFamilyOptionsInterface.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/BackupEngine.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/BackupInfo.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/BackupableDBOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/BlockBasedTableConfig.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/BloomFilter.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/BuiltinComparator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Cache.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CassandraCompactionFilter.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CassandraValueMergeOperator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Checkpoint.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ChecksumType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ClockCache.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ColumnFamilyDescriptor.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ColumnFamilyHandle.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ColumnFamilyMetaData.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ColumnFamilyOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ColumnFamilyOptionsInterface.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompactRangeOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompactionJobInfo.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompactionJobStats.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompactionOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompactionOptionsFIFO.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompactionOptionsUniversal.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompactionPriority.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompactionReason.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompactionStopStyle.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompactionStyle.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ComparatorOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ComparatorType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompressionOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/CompressionType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/DBOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/DBOptionsInterface.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/DataBlockIndexType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/DbPath.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/DirectSlice.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/EncodingType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Env.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/EnvOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Experimental.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Filter.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/FlushOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/HashSkipListMemTableConfig.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/HdfsEnv.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/HistogramData.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/HistogramType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Holder.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/IndexType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/InfoLogLevel.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/IngestExternalFileOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/LRUCache.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/LevelMetaData.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/LiveFileMetaData.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/LogFile.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Logger.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MemTableConfig.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MemoryUsageType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MemoryUtil.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MergeOperator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MutableColumnFamilyOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MutableColumnFamilyOptionsInterface.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MutableDBOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MutableDBOptionsInterface.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MutableOptionKey.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/MutableOptionValue.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/NativeComparatorWrapper.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/NativeLibraryLoader.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/OperationStage.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/OperationType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/OptimisticTransactionDB.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/OptimisticTransactionOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Options.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/OptionsUtil.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/PersistentCache.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/PlainTableConfig.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Priority.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Range.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RateLimiter.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RateLimiterMode.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ReadOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ReadTier.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RemoveEmptyValueCompactionFilter.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RestoreOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ReusedSynchronisationType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksCallbackObject.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksDB.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksDBException.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksEnv.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksIterator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksIteratorInterface.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksMemEnv.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksMutableObject.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/RocksObject.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/SizeApproximationFlag.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/SkipListMemTableConfig.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Slice.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Snapshot.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/SstFileManager.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/SstFileMetaData.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/SstFileReader.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/SstFileReaderIterator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/SstFileWriter.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/StateType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Statistics.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/StatisticsCollector.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/StatisticsCollectorCallback.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/StatsCollectorInput.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/StatsLevel.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Status.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/StringAppendOperator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TableFilter.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TableFormatConfig.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TableProperties.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ThreadStatus.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/ThreadType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TickerType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TimedEnv.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TraceOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TraceWriter.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/Transaction.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TransactionDB.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TransactionDBOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TransactionLogIterator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TransactionOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TransactionalDB.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TransactionalOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TtlDB.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/TxnDBWritePolicy.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/UInt64AddOperator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/VectorMemTableConfig.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WALRecoveryMode.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WBWIRocksIterator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WalFileType.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WalFilter.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WalProcessingOption.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WriteBatch.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WriteBatchInterface.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WriteBatchWithIndex.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WriteBufferManager.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/WriteOptions.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/util/ByteUtil.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/util/BytewiseComparator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/util/Environment.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/util/IntComparator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/util/ReverseBytewiseComparator.java create mode 100644 storage/rocksdb/rocksdb/java/src/main/java/org/rocksdb/util/SizeUnit.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/AbstractTransactionTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/BackupEngineTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/BackupableDBOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/BlockBasedTableConfigTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/BuiltinComparatorTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/CheckPointTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/ClockCacheTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/ColumnFamilyOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/ColumnFamilyTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/CompactRangeOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/CompactionFilterFactoryTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/CompactionJobInfoTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/CompactionJobStatsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/CompactionOptionsFIFOTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/CompactionOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/CompactionOptionsUniversalTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/CompactionPriorityTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/CompactionStopStyleTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/ComparatorOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/CompressionOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/CompressionTypesTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/DBOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/DefaultEnvTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/DirectSliceTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/EnvOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/FilterTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/FlushOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/FlushTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/HdfsEnvTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/InfoLogLevelTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/IngestExternalFileOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/KeyMayExistTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/LRUCacheTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/LoggerTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/MemTableTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/MemoryUtilTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/MergeTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/MixedOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/MutableColumnFamilyOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/MutableDBOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/NativeComparatorWrapperTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/NativeLibraryLoaderTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/OptimisticTransactionDBTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/OptimisticTransactionOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/OptimisticTransactionTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/OptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/OptionsUtilTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/PlainTableConfigTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/PlatformRandomHelper.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/RateLimiterTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/ReadOnlyTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/ReadOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/RocksDBExceptionTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/RocksDBTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/RocksIteratorTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/RocksMemEnvTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/RocksNativeLibraryResource.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/SliceTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/SnapshotTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/SstFileManagerTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/SstFileReaderTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/SstFileWriterTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/StatisticsCollectorTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/StatisticsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/StatsCallbackMock.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/TableFilterTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/TimedEnvTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/TransactionDBOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/TransactionDBTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/TransactionLogIteratorTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/TransactionOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/TransactionTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/TtlDBTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/Types.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/WALRecoveryModeTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/WalFilterTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/WriteBatchHandlerTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/WriteBatchTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/WriteBatchThreadedTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/WriteBatchWithIndexTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/WriteOptionsTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/test/RemoveEmptyValueCompactionFilterFactory.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/test/RocksJunitRunner.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/BytewiseComparatorIntTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/BytewiseComparatorTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/CapturingWriteBatchHandler.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/EnvironmentTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/IntComparatorTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/JNIComparatorTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/ReverseBytewiseComparatorIntTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/SizeUnitTest.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/TestUtil.java create mode 100644 storage/rocksdb/rocksdb/java/src/test/java/org/rocksdb/util/WriteBatchGetter.java create mode 100644 storage/rocksdb/rocksdb/logging/auto_roll_logger.cc create mode 100644 storage/rocksdb/rocksdb/logging/auto_roll_logger.h create mode 100644 storage/rocksdb/rocksdb/logging/auto_roll_logger_test.cc create mode 100644 storage/rocksdb/rocksdb/logging/env_logger.h create mode 100644 storage/rocksdb/rocksdb/logging/env_logger_test.cc create mode 100644 storage/rocksdb/rocksdb/logging/event_logger.cc create mode 100644 storage/rocksdb/rocksdb/logging/event_logger.h create mode 100644 storage/rocksdb/rocksdb/logging/event_logger_test.cc create mode 100644 storage/rocksdb/rocksdb/logging/log_buffer.cc create mode 100644 storage/rocksdb/rocksdb/logging/log_buffer.h create mode 100644 storage/rocksdb/rocksdb/logging/logging.h create mode 100644 storage/rocksdb/rocksdb/logging/posix_logger.h create mode 100644 storage/rocksdb/rocksdb/memory/allocator.h create mode 100644 storage/rocksdb/rocksdb/memory/arena.cc create mode 100644 storage/rocksdb/rocksdb/memory/arena.h create mode 100644 storage/rocksdb/rocksdb/memory/arena_test.cc create mode 100644 storage/rocksdb/rocksdb/memory/concurrent_arena.cc create mode 100644 storage/rocksdb/rocksdb/memory/concurrent_arena.h create mode 100644 storage/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.cc create mode 100644 storage/rocksdb/rocksdb/memory/jemalloc_nodump_allocator.h create mode 100644 storage/rocksdb/rocksdb/memory/memory_allocator.h create mode 100644 storage/rocksdb/rocksdb/memory/memory_usage.h create mode 100644 storage/rocksdb/rocksdb/memtable/alloc_tracker.cc create mode 100644 storage/rocksdb/rocksdb/memtable/hash_linklist_rep.cc create mode 100644 storage/rocksdb/rocksdb/memtable/hash_linklist_rep.h create mode 100644 storage/rocksdb/rocksdb/memtable/hash_skiplist_rep.cc create mode 100644 storage/rocksdb/rocksdb/memtable/hash_skiplist_rep.h create mode 100644 storage/rocksdb/rocksdb/memtable/inlineskiplist.h create mode 100644 storage/rocksdb/rocksdb/memtable/inlineskiplist_test.cc create mode 100644 storage/rocksdb/rocksdb/memtable/memtablerep_bench.cc create mode 100644 storage/rocksdb/rocksdb/memtable/skiplist.h create mode 100644 storage/rocksdb/rocksdb/memtable/skiplist_test.cc create mode 100644 storage/rocksdb/rocksdb/memtable/skiplistrep.cc create mode 100644 storage/rocksdb/rocksdb/memtable/stl_wrappers.h create mode 100644 storage/rocksdb/rocksdb/memtable/vectorrep.cc create mode 100644 storage/rocksdb/rocksdb/memtable/write_buffer_manager.cc create mode 100644 storage/rocksdb/rocksdb/memtable/write_buffer_manager_test.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/file_read_sample.h create mode 100644 storage/rocksdb/rocksdb/monitoring/histogram.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/histogram.h create mode 100644 storage/rocksdb/rocksdb/monitoring/histogram_test.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/histogram_windowing.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/histogram_windowing.h create mode 100644 storage/rocksdb/rocksdb/monitoring/in_memory_stats_history.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/in_memory_stats_history.h create mode 100644 storage/rocksdb/rocksdb/monitoring/instrumented_mutex.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/instrumented_mutex.h create mode 100644 storage/rocksdb/rocksdb/monitoring/iostats_context.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/iostats_context_imp.h create mode 100644 storage/rocksdb/rocksdb/monitoring/iostats_context_test.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/perf_context.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/perf_context_imp.h create mode 100644 storage/rocksdb/rocksdb/monitoring/perf_level.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/perf_level_imp.h create mode 100644 storage/rocksdb/rocksdb/monitoring/perf_step_timer.h create mode 100644 storage/rocksdb/rocksdb/monitoring/persistent_stats_history.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/persistent_stats_history.h create mode 100644 storage/rocksdb/rocksdb/monitoring/statistics.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/statistics.h create mode 100644 storage/rocksdb/rocksdb/monitoring/statistics_test.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/stats_history_test.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/thread_status_impl.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/thread_status_updater.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/thread_status_updater.h create mode 100644 storage/rocksdb/rocksdb/monitoring/thread_status_updater_debug.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/thread_status_util.cc create mode 100644 storage/rocksdb/rocksdb/monitoring/thread_status_util.h create mode 100644 storage/rocksdb/rocksdb/monitoring/thread_status_util_debug.cc create mode 100644 storage/rocksdb/rocksdb/options/cf_options.cc create mode 100644 storage/rocksdb/rocksdb/options/cf_options.h create mode 100644 storage/rocksdb/rocksdb/options/db_options.cc create mode 100644 storage/rocksdb/rocksdb/options/db_options.h create mode 100644 storage/rocksdb/rocksdb/options/options.cc create mode 100644 storage/rocksdb/rocksdb/options/options_helper.cc create mode 100644 storage/rocksdb/rocksdb/options/options_helper.h create mode 100644 storage/rocksdb/rocksdb/options/options_parser.cc create mode 100644 storage/rocksdb/rocksdb/options/options_parser.h create mode 100644 storage/rocksdb/rocksdb/options/options_sanity_check.cc create mode 100644 storage/rocksdb/rocksdb/options/options_sanity_check.h create mode 100644 storage/rocksdb/rocksdb/options/options_settable_test.cc create mode 100644 storage/rocksdb/rocksdb/options/options_test.cc create mode 100644 storage/rocksdb/rocksdb/port/README create mode 100644 storage/rocksdb/rocksdb/port/jemalloc_helper.h create mode 100644 storage/rocksdb/rocksdb/port/likely.h create mode 100644 storage/rocksdb/rocksdb/port/malloc.h create mode 100644 storage/rocksdb/rocksdb/port/port.h create mode 100644 storage/rocksdb/rocksdb/port/port_dirent.h create mode 100644 storage/rocksdb/rocksdb/port/port_example.h create mode 100644 storage/rocksdb/rocksdb/port/port_posix.cc create mode 100644 storage/rocksdb/rocksdb/port/port_posix.h create mode 100644 storage/rocksdb/rocksdb/port/stack_trace.cc create mode 100644 storage/rocksdb/rocksdb/port/stack_trace.h create mode 100644 storage/rocksdb/rocksdb/port/sys_time.h create mode 100644 storage/rocksdb/rocksdb/port/util_logger.h create mode 100644 storage/rocksdb/rocksdb/port/win/env_default.cc create mode 100644 storage/rocksdb/rocksdb/port/win/env_win.cc create mode 100644 storage/rocksdb/rocksdb/port/win/env_win.h create mode 100644 storage/rocksdb/rocksdb/port/win/io_win.cc create mode 100644 storage/rocksdb/rocksdb/port/win/io_win.h create mode 100644 storage/rocksdb/rocksdb/port/win/port_win.cc create mode 100644 storage/rocksdb/rocksdb/port/win/port_win.h create mode 100644 storage/rocksdb/rocksdb/port/win/win_jemalloc.cc create mode 100644 storage/rocksdb/rocksdb/port/win/win_logger.cc create mode 100644 storage/rocksdb/rocksdb/port/win/win_logger.h create mode 100644 storage/rocksdb/rocksdb/port/win/win_thread.cc create mode 100644 storage/rocksdb/rocksdb/port/win/win_thread.h create mode 100644 storage/rocksdb/rocksdb/port/win/xpress_win.cc create mode 100644 storage/rocksdb/rocksdb/port/win/xpress_win.h create mode 100644 storage/rocksdb/rocksdb/port/xpress.h create mode 100644 storage/rocksdb/rocksdb/src.mk create mode 100644 storage/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.cc create mode 100644 storage/rocksdb/rocksdb/table/adaptive/adaptive_table_factory.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/block.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/block.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_based_filter_block.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_based_filter_block.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_based_filter_block_test.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_based_table_builder.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_based_table_builder.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_based_table_factory.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_based_table_factory.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_based_table_reader.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_based_table_reader.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_builder.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_builder.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_prefix_index.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_prefix_index.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_test.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/block_type.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/cachable_entry.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/data_block_footer.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/data_block_footer.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/data_block_hash_index.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/data_block_hash_index.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/data_block_hash_index_test.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/filter_block.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/filter_block_reader_common.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/filter_block_reader_common.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/filter_policy.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/filter_policy_internal.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/flush_block_policy.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/flush_block_policy.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/full_filter_block.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/full_filter_block.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/full_filter_block_test.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/index_builder.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/index_builder.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/mock_block_based_table.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/parsed_full_filter_block.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/partitioned_filter_block.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/partitioned_filter_block.h create mode 100644 storage/rocksdb/rocksdb/table/block_based/partitioned_filter_block_test.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.cc create mode 100644 storage/rocksdb/rocksdb/table/block_based/uncompression_dict_reader.h create mode 100644 storage/rocksdb/rocksdb/table/block_fetcher.cc create mode 100644 storage/rocksdb/rocksdb/table/block_fetcher.h create mode 100644 storage/rocksdb/rocksdb/table/cleanable_test.cc create mode 100644 storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.cc create mode 100644 storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder.h create mode 100644 storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_builder_test.cc create mode 100644 storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.cc create mode 100644 storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_factory.h create mode 100644 storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.cc create mode 100644 storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader.h create mode 100644 storage/rocksdb/rocksdb/table/cuckoo/cuckoo_table_reader_test.cc create mode 100644 storage/rocksdb/rocksdb/table/format.cc create mode 100644 storage/rocksdb/rocksdb/table/format.h create mode 100644 storage/rocksdb/rocksdb/table/get_context.cc create mode 100644 storage/rocksdb/rocksdb/table/get_context.h create mode 100644 storage/rocksdb/rocksdb/table/internal_iterator.h create mode 100644 storage/rocksdb/rocksdb/table/iter_heap.h create mode 100644 storage/rocksdb/rocksdb/table/iterator.cc create mode 100644 storage/rocksdb/rocksdb/table/iterator_wrapper.h create mode 100644 storage/rocksdb/rocksdb/table/merger_test.cc create mode 100644 storage/rocksdb/rocksdb/table/merging_iterator.cc create mode 100644 storage/rocksdb/rocksdb/table/merging_iterator.h create mode 100644 storage/rocksdb/rocksdb/table/meta_blocks.cc create mode 100644 storage/rocksdb/rocksdb/table/meta_blocks.h create mode 100644 storage/rocksdb/rocksdb/table/mock_table.cc create mode 100644 storage/rocksdb/rocksdb/table/mock_table.h create mode 100644 storage/rocksdb/rocksdb/table/multiget_context.h create mode 100644 storage/rocksdb/rocksdb/table/persistent_cache_helper.cc create mode 100644 storage/rocksdb/rocksdb/table/persistent_cache_helper.h create mode 100644 storage/rocksdb/rocksdb/table/persistent_cache_options.h create mode 100644 storage/rocksdb/rocksdb/table/plain/plain_table_bloom.cc create mode 100644 storage/rocksdb/rocksdb/table/plain/plain_table_bloom.h create mode 100644 storage/rocksdb/rocksdb/table/plain/plain_table_builder.cc create mode 100644 storage/rocksdb/rocksdb/table/plain/plain_table_builder.h create mode 100644 storage/rocksdb/rocksdb/table/plain/plain_table_factory.cc create mode 100644 storage/rocksdb/rocksdb/table/plain/plain_table_factory.h create mode 100644 storage/rocksdb/rocksdb/table/plain/plain_table_index.cc create mode 100644 storage/rocksdb/rocksdb/table/plain/plain_table_index.h create mode 100644 storage/rocksdb/rocksdb/table/plain/plain_table_key_coding.cc create mode 100644 storage/rocksdb/rocksdb/table/plain/plain_table_key_coding.h create mode 100644 storage/rocksdb/rocksdb/table/plain/plain_table_reader.cc create mode 100644 storage/rocksdb/rocksdb/table/plain/plain_table_reader.h create mode 100644 storage/rocksdb/rocksdb/table/scoped_arena_iterator.h create mode 100644 storage/rocksdb/rocksdb/table/sst_file_reader.cc create mode 100644 storage/rocksdb/rocksdb/table/sst_file_reader_test.cc create mode 100644 storage/rocksdb/rocksdb/table/sst_file_writer.cc create mode 100644 storage/rocksdb/rocksdb/table/sst_file_writer_collectors.h create mode 100644 storage/rocksdb/rocksdb/table/table_builder.h create mode 100644 storage/rocksdb/rocksdb/table/table_properties.cc create mode 100644 storage/rocksdb/rocksdb/table/table_properties_internal.h create mode 100644 storage/rocksdb/rocksdb/table/table_reader.h create mode 100644 storage/rocksdb/rocksdb/table/table_reader_bench.cc create mode 100644 storage/rocksdb/rocksdb/table/table_reader_caller.h create mode 100644 storage/rocksdb/rocksdb/table/table_test.cc create mode 100644 storage/rocksdb/rocksdb/table/two_level_iterator.cc create mode 100644 storage/rocksdb/rocksdb/table/two_level_iterator.h create mode 100644 storage/rocksdb/rocksdb/test_util/fault_injection_test_env.cc create mode 100644 storage/rocksdb/rocksdb/test_util/fault_injection_test_env.h create mode 100644 storage/rocksdb/rocksdb/test_util/mock_time_env.h create mode 100644 storage/rocksdb/rocksdb/test_util/sync_point.cc create mode 100644 storage/rocksdb/rocksdb/test_util/sync_point.h create mode 100644 storage/rocksdb/rocksdb/test_util/sync_point_impl.cc create mode 100644 storage/rocksdb/rocksdb/test_util/sync_point_impl.h create mode 100644 storage/rocksdb/rocksdb/test_util/testharness.cc create mode 100644 storage/rocksdb/rocksdb/test_util/testharness.h create mode 100644 storage/rocksdb/rocksdb/test_util/testutil.cc create mode 100644 storage/rocksdb/rocksdb/test_util/testutil.h create mode 100644 storage/rocksdb/rocksdb/test_util/transaction_test_util.cc create mode 100644 storage/rocksdb/rocksdb/test_util/transaction_test_util.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/CPortability.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/ConstexprMath.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/Indestructible.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/Optional.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/Portability.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/ScopeGuard.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/Traits.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/Unit.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/Utility.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/chrono/Hardware.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/container/Array.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/detail/Futex-inl.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.cpp create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/detail/Futex.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/functional/Invoke.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/hash/Hash.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/lang/Align.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/lang/Bits.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/lang/Launder.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/portability/Asm.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/portability/SysSyscall.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/portability/SysTypes.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification-inl.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.cpp create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicNotification.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil-inl.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/AtomicUtil.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/Baton.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex-inl.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.cpp create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutex.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/DistributedMutexSpecializations.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.cpp create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/ParkingLot.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.cpp create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/WaitOptions.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/InlineFunctionRef.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable-inl.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/ProxyLockable.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Sleeper.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/detail/Spin.h create mode 100644 storage/rocksdb/rocksdb/third-party/folly/folly/synchronization/test/DistributedMutexTest.cpp create mode 100644 storage/rocksdb/rocksdb/third-party/gtest-1.8.1/fused-src/gtest/CMakeLists.txt create mode 100644 storage/rocksdb/rocksdb/third-party/gtest-1.8.1/fused-src/gtest/gtest-all.cc create mode 100644 storage/rocksdb/rocksdb/third-party/gtest-1.8.1/fused-src/gtest/gtest.h create mode 100644 storage/rocksdb/rocksdb/third-party/gtest-1.8.1/fused-src/gtest/gtest_main.cc create mode 100644 storage/rocksdb/rocksdb/thirdparty.inc create mode 100644 storage/rocksdb/rocksdb/tools/CMakeLists.txt create mode 100644 storage/rocksdb/rocksdb/tools/Dockerfile create mode 100644 storage/rocksdb/rocksdb/tools/advisor/README.md create mode 100644 storage/rocksdb/rocksdb/tools/advisor/advisor/__init__.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/advisor/bench_runner.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/advisor/config_optimizer_example.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/advisor/db_bench_runner.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/advisor/db_config_optimizer.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/advisor/db_log_parser.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/advisor/db_options_parser.py create mode 100755 storage/rocksdb/rocksdb/tools/advisor/advisor/db_stats_fetcher.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/advisor/db_timeseries_parser.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/advisor/ini_parser.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/advisor/rule_parser.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/advisor/rule_parser_example.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/advisor/rules.ini create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/__init__.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/input_files/LOG-0 create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/input_files/LOG-1 create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/input_files/OPTIONS-000005 create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/input_files/log_stats_parser_keys_ts create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/input_files/rules_err1.ini create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/input_files/rules_err2.ini create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/input_files/rules_err3.ini create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/input_files/rules_err4.ini create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/input_files/test_rules.ini create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/input_files/triggered_rules.ini create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/test_db_bench_runner.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/test_db_log_parser.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/test_db_options_parser.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/test_db_stats_fetcher.py create mode 100644 storage/rocksdb/rocksdb/tools/advisor/test/test_rule_parser.py create mode 100755 storage/rocksdb/rocksdb/tools/analyze_txn_stress_test.sh create mode 100755 storage/rocksdb/rocksdb/tools/auto_sanity_test.sh create mode 100755 storage/rocksdb/rocksdb/tools/benchmark.sh create mode 100755 storage/rocksdb/rocksdb/tools/benchmark_leveldb.sh create mode 100644 storage/rocksdb/rocksdb/tools/blob_dump.cc create mode 100644 storage/rocksdb/rocksdb/tools/block_cache_analyzer/__init__.py create mode 100644 storage/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.py create mode 100644 storage/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim.sh create mode 100644 storage/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_pysim_test.py create mode 100644 storage/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.cc create mode 100644 storage/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer.h create mode 100644 storage/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_plot.py create mode 100644 storage/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_test.cc create mode 100644 storage/rocksdb/rocksdb/tools/block_cache_analyzer/block_cache_trace_analyzer_tool.cc create mode 100644 storage/rocksdb/rocksdb/tools/check_all_python.py create mode 100755 storage/rocksdb/rocksdb/tools/check_format_compatible.sh create mode 100644 storage/rocksdb/rocksdb/tools/db_bench.cc create mode 100644 storage/rocksdb/rocksdb/tools/db_bench_tool.cc create mode 100644 storage/rocksdb/rocksdb/tools/db_bench_tool_test.cc create mode 100644 storage/rocksdb/rocksdb/tools/db_crashtest.py create mode 100644 storage/rocksdb/rocksdb/tools/db_repl_stress.cc create mode 100644 storage/rocksdb/rocksdb/tools/db_sanity_test.cc create mode 100755 storage/rocksdb/rocksdb/tools/dbench_monitor create mode 100644 storage/rocksdb/rocksdb/tools/dump/db_dump_tool.cc create mode 100644 storage/rocksdb/rocksdb/tools/dump/rocksdb_dump.cc create mode 100644 storage/rocksdb/rocksdb/tools/dump/rocksdb_undump.cc create mode 100755 storage/rocksdb/rocksdb/tools/generate_random_db.sh create mode 100755 storage/rocksdb/rocksdb/tools/ingest_external_sst.sh create mode 100644 storage/rocksdb/rocksdb/tools/ldb.cc create mode 100644 storage/rocksdb/rocksdb/tools/ldb_cmd.cc create mode 100644 storage/rocksdb/rocksdb/tools/ldb_cmd_impl.h create mode 100644 storage/rocksdb/rocksdb/tools/ldb_cmd_test.cc create mode 100644 storage/rocksdb/rocksdb/tools/ldb_test.py create mode 100644 storage/rocksdb/rocksdb/tools/ldb_tool.cc create mode 100755 storage/rocksdb/rocksdb/tools/pflag create mode 100644 storage/rocksdb/rocksdb/tools/rdb/.gitignore create mode 100644 storage/rocksdb/rocksdb/tools/rdb/API.md create mode 100644 storage/rocksdb/rocksdb/tools/rdb/README.md create mode 100644 storage/rocksdb/rocksdb/tools/rdb/binding.gyp create mode 100644 storage/rocksdb/rocksdb/tools/rdb/db_wrapper.cc create mode 100644 storage/rocksdb/rocksdb/tools/rdb/db_wrapper.h create mode 100755 storage/rocksdb/rocksdb/tools/rdb/rdb create mode 100644 storage/rocksdb/rocksdb/tools/rdb/rdb.cc create mode 100644 storage/rocksdb/rocksdb/tools/rdb/unit_test.js create mode 100644 storage/rocksdb/rocksdb/tools/reduce_levels_test.cc create mode 100755 storage/rocksdb/rocksdb/tools/regression_test.sh create mode 100755 storage/rocksdb/rocksdb/tools/report_lite_binary_size.sh create mode 100755 storage/rocksdb/rocksdb/tools/rocksdb_dump_test.sh create mode 100755 storage/rocksdb/rocksdb/tools/run_flash_bench.sh create mode 100755 storage/rocksdb/rocksdb/tools/run_leveldb.sh create mode 100644 storage/rocksdb/rocksdb/tools/sample-dump.dmp create mode 100644 storage/rocksdb/rocksdb/tools/sst_dump.cc create mode 100644 storage/rocksdb/rocksdb/tools/sst_dump_test.cc create mode 100644 storage/rocksdb/rocksdb/tools/sst_dump_tool.cc create mode 100644 storage/rocksdb/rocksdb/tools/sst_dump_tool_imp.h create mode 100644 storage/rocksdb/rocksdb/tools/trace_analyzer.cc create mode 100644 storage/rocksdb/rocksdb/tools/trace_analyzer_test.cc create mode 100644 storage/rocksdb/rocksdb/tools/trace_analyzer_tool.cc create mode 100644 storage/rocksdb/rocksdb/tools/trace_analyzer_tool.h create mode 100755 storage/rocksdb/rocksdb/tools/verify_random_db.sh create mode 100755 storage/rocksdb/rocksdb/tools/write_external_sst.sh create mode 100644 storage/rocksdb/rocksdb/tools/write_stress.cc create mode 100644 storage/rocksdb/rocksdb/tools/write_stress_runner.py create mode 100644 storage/rocksdb/rocksdb/trace_replay/block_cache_tracer.cc create mode 100644 storage/rocksdb/rocksdb/trace_replay/block_cache_tracer.h create mode 100644 storage/rocksdb/rocksdb/trace_replay/block_cache_tracer_test.cc create mode 100644 storage/rocksdb/rocksdb/trace_replay/trace_replay.cc create mode 100644 storage/rocksdb/rocksdb/trace_replay/trace_replay.h create mode 100644 storage/rocksdb/rocksdb/util/aligned_buffer.h create mode 100644 storage/rocksdb/rocksdb/util/autovector.h create mode 100644 storage/rocksdb/rocksdb/util/autovector_test.cc create mode 100644 storage/rocksdb/rocksdb/util/bloom_impl.h create mode 100644 storage/rocksdb/rocksdb/util/bloom_test.cc create mode 100644 storage/rocksdb/rocksdb/util/build_version.cc.in create mode 100644 storage/rocksdb/rocksdb/util/build_version.h create mode 100644 storage/rocksdb/rocksdb/util/cast_util.h create mode 100644 storage/rocksdb/rocksdb/util/channel.h create mode 100644 storage/rocksdb/rocksdb/util/coding.cc create mode 100644 storage/rocksdb/rocksdb/util/coding.h create mode 100644 storage/rocksdb/rocksdb/util/coding_test.cc create mode 100644 storage/rocksdb/rocksdb/util/compaction_job_stats_impl.cc create mode 100644 storage/rocksdb/rocksdb/util/comparator.cc create mode 100644 storage/rocksdb/rocksdb/util/compression.h create mode 100644 storage/rocksdb/rocksdb/util/compression_context_cache.cc create mode 100644 storage/rocksdb/rocksdb/util/compression_context_cache.h create mode 100644 storage/rocksdb/rocksdb/util/concurrent_task_limiter_impl.cc create mode 100644 storage/rocksdb/rocksdb/util/concurrent_task_limiter_impl.h create mode 100644 storage/rocksdb/rocksdb/util/core_local.h create mode 100644 storage/rocksdb/rocksdb/util/crc32c.cc create mode 100644 storage/rocksdb/rocksdb/util/crc32c.h create mode 100644 storage/rocksdb/rocksdb/util/crc32c_arm64.cc create mode 100644 storage/rocksdb/rocksdb/util/crc32c_arm64.h create mode 100644 storage/rocksdb/rocksdb/util/crc32c_ppc.c create mode 100644 storage/rocksdb/rocksdb/util/crc32c_ppc.h create mode 100644 storage/rocksdb/rocksdb/util/crc32c_ppc_asm.S create mode 100644 storage/rocksdb/rocksdb/util/crc32c_ppc_constants.h create mode 100644 storage/rocksdb/rocksdb/util/crc32c_test.cc create mode 100644 storage/rocksdb/rocksdb/util/defer.h create mode 100644 storage/rocksdb/rocksdb/util/defer_test.cc create mode 100644 storage/rocksdb/rocksdb/util/duplicate_detector.h create mode 100644 storage/rocksdb/rocksdb/util/dynamic_bloom.cc create mode 100644 storage/rocksdb/rocksdb/util/dynamic_bloom.h create mode 100644 storage/rocksdb/rocksdb/util/dynamic_bloom_test.cc create mode 100644 storage/rocksdb/rocksdb/util/file_checksum_helper.cc create mode 100644 storage/rocksdb/rocksdb/util/file_checksum_helper.h create mode 100644 storage/rocksdb/rocksdb/util/file_reader_writer_test.cc create mode 100644 storage/rocksdb/rocksdb/util/filelock_test.cc create mode 100644 storage/rocksdb/rocksdb/util/filter_bench.cc create mode 100644 storage/rocksdb/rocksdb/util/gflags_compat.h create mode 100644 storage/rocksdb/rocksdb/util/hash.cc create mode 100644 storage/rocksdb/rocksdb/util/hash.h create mode 100644 storage/rocksdb/rocksdb/util/hash_map.h create mode 100644 storage/rocksdb/rocksdb/util/hash_test.cc create mode 100644 storage/rocksdb/rocksdb/util/heap.h create mode 100644 storage/rocksdb/rocksdb/util/heap_test.cc create mode 100644 storage/rocksdb/rocksdb/util/kv_map.h create mode 100644 storage/rocksdb/rocksdb/util/log_write_bench.cc create mode 100644 storage/rocksdb/rocksdb/util/murmurhash.cc create mode 100644 storage/rocksdb/rocksdb/util/murmurhash.h create mode 100644 storage/rocksdb/rocksdb/util/mutexlock.h create mode 100644 storage/rocksdb/rocksdb/util/ppc-opcode.h create mode 100644 storage/rocksdb/rocksdb/util/random.cc create mode 100644 storage/rocksdb/rocksdb/util/random.h create mode 100644 storage/rocksdb/rocksdb/util/random_test.cc create mode 100644 storage/rocksdb/rocksdb/util/rate_limiter.cc create mode 100644 storage/rocksdb/rocksdb/util/rate_limiter.h create mode 100644 storage/rocksdb/rocksdb/util/rate_limiter_test.cc create mode 100644 storage/rocksdb/rocksdb/util/repeatable_thread.h create mode 100644 storage/rocksdb/rocksdb/util/repeatable_thread_test.cc create mode 100644 storage/rocksdb/rocksdb/util/set_comparator.h create mode 100644 storage/rocksdb/rocksdb/util/slice.cc create mode 100644 storage/rocksdb/rocksdb/util/slice_test.cc create mode 100644 storage/rocksdb/rocksdb/util/slice_transform_test.cc create mode 100644 storage/rocksdb/rocksdb/util/status.cc create mode 100644 storage/rocksdb/rocksdb/util/stderr_logger.h create mode 100644 storage/rocksdb/rocksdb/util/stop_watch.h create mode 100644 storage/rocksdb/rocksdb/util/string_util.cc create mode 100644 storage/rocksdb/rocksdb/util/string_util.h create mode 100644 storage/rocksdb/rocksdb/util/thread_list_test.cc create mode 100644 storage/rocksdb/rocksdb/util/thread_local.cc create mode 100644 storage/rocksdb/rocksdb/util/thread_local.h create mode 100644 storage/rocksdb/rocksdb/util/thread_local_test.cc create mode 100644 storage/rocksdb/rocksdb/util/thread_operation.h create mode 100644 storage/rocksdb/rocksdb/util/threadpool_imp.cc create mode 100644 storage/rocksdb/rocksdb/util/threadpool_imp.h create mode 100644 storage/rocksdb/rocksdb/util/timer_queue.h create mode 100644 storage/rocksdb/rocksdb/util/timer_queue_test.cc create mode 100644 storage/rocksdb/rocksdb/util/user_comparator_wrapper.h create mode 100644 storage/rocksdb/rocksdb/util/util.h create mode 100644 storage/rocksdb/rocksdb/util/vector_iterator.h create mode 100644 storage/rocksdb/rocksdb/util/xxh3p.h create mode 100644 storage/rocksdb/rocksdb/util/xxhash.cc create mode 100644 storage/rocksdb/rocksdb/util/xxhash.h create mode 100644 storage/rocksdb/rocksdb/utilities/backupable/backupable_db.cc create mode 100644 storage/rocksdb/rocksdb/utilities/backupable/backupable_db_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.cc create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_compaction_filter.h create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_db.cc create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_db.h create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_db_gc_stats.h create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.cc create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_db_impl.h create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_db_impl_filesnapshot.cc create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_db_iterator.h create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_db_listener.h create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_db_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.cc create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_dump_tool.h create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_file.cc create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_file.h create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_log_format.cc create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_log_format.h create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_log_reader.cc create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_log_reader.h create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_log_writer.cc create mode 100644 storage/rocksdb/rocksdb/utilities/blob_db/blob_log_writer.h create mode 100644 storage/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.cc create mode 100644 storage/rocksdb/rocksdb/utilities/cassandra/cassandra_compaction_filter.h create mode 100644 storage/rocksdb/rocksdb/utilities/cassandra/cassandra_format_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/cassandra/cassandra_functional_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/cassandra/cassandra_row_merge_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/cassandra/cassandra_serialize_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/cassandra/format.cc create mode 100644 storage/rocksdb/rocksdb/utilities/cassandra/format.h create mode 100644 storage/rocksdb/rocksdb/utilities/cassandra/merge_operator.cc create mode 100644 storage/rocksdb/rocksdb/utilities/cassandra/merge_operator.h create mode 100644 storage/rocksdb/rocksdb/utilities/cassandra/serialize.h create mode 100644 storage/rocksdb/rocksdb/utilities/cassandra/test_utils.cc create mode 100644 storage/rocksdb/rocksdb/utilities/cassandra/test_utils.h create mode 100644 storage/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.cc create mode 100644 storage/rocksdb/rocksdb/utilities/checkpoint/checkpoint_impl.h create mode 100644 storage/rocksdb/rocksdb/utilities/checkpoint/checkpoint_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc create mode 100644 storage/rocksdb/rocksdb/utilities/compaction_filters/remove_emptyvalue_compactionfilter.h create mode 100644 storage/rocksdb/rocksdb/utilities/convenience/info_log_finder.cc create mode 100644 storage/rocksdb/rocksdb/utilities/debug.cc create mode 100644 storage/rocksdb/rocksdb/utilities/env_librados.cc create mode 100644 storage/rocksdb/rocksdb/utilities/env_librados.md create mode 100644 storage/rocksdb/rocksdb/utilities/env_librados_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/env_mirror.cc create mode 100644 storage/rocksdb/rocksdb/utilities/env_mirror_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/env_timed.cc create mode 100644 storage/rocksdb/rocksdb/utilities/env_timed_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/leveldb_options/leveldb_options.cc create mode 100644 storage/rocksdb/rocksdb/utilities/memory/memory_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/memory/memory_util.cc create mode 100644 storage/rocksdb/rocksdb/utilities/merge_operators.h create mode 100644 storage/rocksdb/rocksdb/utilities/merge_operators/bytesxor.cc create mode 100644 storage/rocksdb/rocksdb/utilities/merge_operators/bytesxor.h create mode 100644 storage/rocksdb/rocksdb/utilities/merge_operators/max.cc create mode 100644 storage/rocksdb/rocksdb/utilities/merge_operators/put.cc create mode 100644 storage/rocksdb/rocksdb/utilities/merge_operators/sortlist.cc create mode 100644 storage/rocksdb/rocksdb/utilities/merge_operators/sortlist.h create mode 100644 storage/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.cc create mode 100644 storage/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend.h create mode 100644 storage/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.cc create mode 100644 storage/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend2.h create mode 100644 storage/rocksdb/rocksdb/utilities/merge_operators/string_append/stringappend_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/merge_operators/uint64add.cc create mode 100644 storage/rocksdb/rocksdb/utilities/object_registry.cc create mode 100644 storage/rocksdb/rocksdb/utilities/object_registry_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration.cc create mode 100644 storage/rocksdb/rocksdb/utilities/option_change_migration/option_change_migration_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/options/options_util.cc create mode 100644 storage/rocksdb/rocksdb/utilities/options/options_util_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.cc create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier.h create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.cc create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file.h create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_file_buffer.h create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.cc create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/block_cache_tier_metadata.h create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/hash_table.h create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/hash_table_bench.cc create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/hash_table_evictable.h create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/hash_table_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/lrulist.h create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_bench.cc create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_test.h create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.cc create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_tier.h create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/persistent_cache_util.h create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.cc create mode 100644 storage/rocksdb/rocksdb/utilities/persistent_cache/volatile_tier_impl.h create mode 100644 storage/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.cc create mode 100644 storage/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator.h create mode 100644 storage/rocksdb/rocksdb/utilities/simulator_cache/cache_simulator_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/simulator_cache/sim_cache.cc create mode 100644 storage/rocksdb/rocksdb/utilities/simulator_cache/sim_cache_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.cc create mode 100644 storage/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector.h create mode 100644 storage/rocksdb/rocksdb/utilities/table_properties_collectors/compact_on_deletion_collector_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.cc create mode 100644 storage/rocksdb/rocksdb/utilities/trace/file_trace_reader_writer.h create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/optimistic_transaction.h create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_db_impl.h create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/optimistic_transaction_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction.h create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/pessimistic_transaction_db.h create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/snapshot_checker.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/transaction_base.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/transaction_base.h create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/transaction_db_mutex_impl.h create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/transaction_lock_mgr.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/transaction_lock_mgr.h create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/transaction_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/transaction_test.h create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/transaction_util.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/transaction_util.h create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/write_prepared_transaction_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/write_prepared_txn.h create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/write_prepared_txn_db.h create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/write_unprepared_transaction_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn.h create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.cc create mode 100644 storage/rocksdb/rocksdb/utilities/transactions/write_unprepared_txn_db.h create mode 100644 storage/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.cc create mode 100644 storage/rocksdb/rocksdb/utilities/ttl/db_ttl_impl.h create mode 100644 storage/rocksdb/rocksdb/utilities/ttl/ttl_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/util_merge_operators_test.cc create mode 100644 storage/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index.cc create mode 100644 storage/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.cc create mode 100644 storage/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_internal.h create mode 100644 storage/rocksdb/rocksdb/utilities/write_batch_with_index/write_batch_with_index_test.cc create mode 100644 storage/rocksdb/tools/mysql_ldb.cc create mode 100644 storage/rocksdb/unittest/CMakeLists.txt create mode 100644 storage/rocksdb/unittest/test_properties_collector.cc create mode 100644 storage/rocksdb/ut0counter.h (limited to 'storage/rocksdb') diff --git a/storage/rocksdb/.clang-format b/storage/rocksdb/.clang-format new file mode 100644 index 00000000..b1df76bd --- /dev/null +++ b/storage/rocksdb/.clang-format @@ -0,0 +1,137 @@ +# Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License, version 2.0, +# as published by the Free Software Foundation. +# +# This program is also distributed with certain software (including +# but not limited to OpenSSL) that is licensed under separate terms, +# as designated in a particular file or component or in included license +# documentation. The authors of MySQL hereby grant you an additional +# permission to link the program and your derivative works with the +# separately licensed software that they have included with MySQL. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License, version 2.0, for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +# This is the output of clang-format-5.0 --style=google --dump-config, +# except for changes mentioned below. We lock the style so that any newer +# version of clang-format will give the same result; as time goes, we may +# update this list, requiring newer versions of clang-format. + +Language: Cpp +# BasedOnStyle: Google +AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlines: Left +AlignOperands: true +AlignTrailingComments: true +AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: true +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +BinPackArguments: true +BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false + SplitEmptyFunction: true + SplitEmptyRecord: true + SplitEmptyNamespace: true +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeInheritanceComma: false +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakConstructorInitializers: BeforeColon +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +FixNamespaceComments: true +ForEachMacros: + - foreach + - Q_FOREACH + - BOOST_FOREACH +IncludeCategories: + - Regex: '^<.*\.h>' + Priority: 1 + - Regex: '^<.*' + Priority: 2 + - Regex: '.*' + Priority: 3 +IncludeIsMainRegex: '([-_](test|unittest))?$' +IndentCaseLabels: true +IndentWidth: 2 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: false +PenaltyBreakAssignment: 2 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +ReflowComments: true +SortIncludes: true +SortUsingDeclarations: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +TabWidth: 8 +UseTab: Never + +# We declare one specific pointer style since right alignment is dominant in +# the MySQL code base (default --style=google has DerivePointerAlignment true). +DerivePointerAlignment: false +PointerAlignment: Right + +# MySQL source code is allowed to use C++11 features. +Standard: Cpp11 diff --git a/storage/rocksdb/.gitignore b/storage/rocksdb/.gitignore new file mode 100644 index 00000000..adf3e154 --- /dev/null +++ b/storage/rocksdb/.gitignore @@ -0,0 +1,2 @@ +build_version.cc +.* diff --git a/storage/rocksdb/CMakeLists.txt b/storage/rocksdb/CMakeLists.txt new file mode 100644 index 00000000..d5bbefbd --- /dev/null +++ b/storage/rocksdb/CMakeLists.txt @@ -0,0 +1,276 @@ +# TODO: Copyrights + +MACRO(SKIP_ROCKSDB_PLUGIN msg) + MESSAGE_ONCE(SKIP_ROCKSDB_PLUGIN "Can't build rocksdb engine - ${msg}") + ADD_FEATURE_INFO(ROCKSDB "OFF" "Storage Engine") + RETURN() +ENDMACRO() + +IF (NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/CMakeLists.txt") + SKIP_ROCKSDB_PLUGIN("Missing CMakeLists.txt in rocksdb directory. Try \"git submodule update\".") +ENDIF() + +CHECK_LIBRARY_EXISTS(rt timer_delete "" HAVE_TIMER_DELETE) +IF (HAVE_TIMER_DELETE) + ADD_DEFINITIONS(-DHAVE_TIMER_DELETE) +ENDIF(HAVE_TIMER_DELETE) + +CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU) +IF(HAVE_SCHED_GETCPU) + ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1 -DROCKSDB_SCHED_GETCPU_PRESENT) +ENDIF() + +IF(WITH_VALGRIND) + ADD_DEFINITIONS(-DROCKSDB_VALGRIND_RUN=1) +ENDIF() + +# We've had our builders hang during the build process. This prevents MariaRocks +# to be built on 32 bit intel OS kernels. +IF(CMAKE_SYSTEM_PROCESSOR MATCHES "i[36]86") + SKIP_ROCKSDB_PLUGIN("Intel 32 bit not supported.") +ENDIF() + +# Due to retrieved data being incorrect endian +include(TestBigEndian) +test_big_endian(BIG_ENDIAN) +if(BIG_ENDIAN) + SKIP_ROCKSDB_PLUGIN("Big Endian not supported.") +endif() + +# +# Also, disable building on 32-bit Windows +# +IF (WIN32 AND CMAKE_SIZEOF_VOID_P EQUAL 4) + SKIP_ROCKSDB_PLUGIN("32-Bit Windows are temporarily disabled") +ENDIF() + +# This plugin needs recent C++ compilers (it is using C++11 features) +# Skip build for the old compilers +SET(CXX11_FLAGS) +SET(OLD_COMPILER_MSG "requires c++11 -capable compiler (minimal supported versions are g++ 4.8, clang 3.3, VS2015)") + +IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU") + EXECUTE_PROCESS(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + IF (GCC_VERSION VERSION_LESS 4.8) + SKIP_ROCKSDB_PLUGIN("${OLD_COMPILER_MSG}") + ENDIF() + SET(CXX11_FLAGS "-std=c++11") + IF (GCC_VERSION VERSION_LESS 5.0) + SET(CXX11_FLAGS "-std=c++11 -Wno-missing-field-initializers") + ENDIF() +ELSEIF (CMAKE_CXX_COMPILER_ID MATCHES "Clang") + IF ((CMAKE_CXX_COMPILER_VERSION AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.3) OR + (CLANG_VERSION_STRING AND CLANG_VERSION_STRING VERSION_LESS 3.3)) + SKIP_ROCKSDB_PLUGIN("${OLD_COMPILER_MSG}") + ENDIF() + SET(CXX11_FLAGS "-std=c++11 -stdlib=libstdc++") + IF(MSVC) + # clang-cl does not work yet + SKIP_ROCKSDB_PLUGIN("Clang-cl is not supported") + ENDIF() +ELSEIF(MSVC) + IF (MSVC_VERSION LESS 1900) + SKIP_ROCKSDB_PLUGIN("${OLD_COMPILER_MSG}") + ENDIF() +ELSE() + SKIP_ROCKSDB_PLUGIN("Compiler not supported") +ENDIF() + +IF(CMAKE_VERSION GREATER 3.0) + SET(CMAKE_CXX_STANDARD 11) +ELSEIF(CXX11_FLAGS) + ADD_DEFINITIONS(${CXX11_FLAGS}) +ENDIF() + +SET(ROCKSDB_SE_SOURCES + rdb_mariadb_server_port.cc + rdb_mariadb_server_port.h + ha_rocksdb.cc + ha_rocksdb.h + rdb_i_s.cc + rdb_i_s.h + rdb_io_watchdog.h + rdb_io_watchdog.cc + rdb_mutex_wrapper.cc + rdb_mutex_wrapper.h + rdb_index_merge.cc + rdb_index_merge.h + properties_collector.cc + properties_collector.h + rdb_datadic.cc + rdb_datadic.h + rdb_cf_manager.cc + rdb_cf_manager.h + rdb_utils.cc rdb_utils.h + rdb_threads.cc + rdb_threads.h + rdb_psi.h + rdb_psi.cc + rdb_sst_info.cc + rdb_sst_info.h + rdb_converter.cc + rdb_converter.h +) + +# MariaDB: the following is added in build_rocksdb.cmake, when appropriate: +# This is a strong requirement coming from RocksDB. No conditional checks here. +#ADD_DEFINITIONS(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX +#) +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "riscv64") + SET(ATOMIC_EXTRA_LIBS -latomic) +else() + SET(ATOMIC_EXTRA_LIBS) +endif() + +MYSQL_ADD_PLUGIN(rocksdb ${ROCKSDB_SE_SOURCES} MODULE_ONLY STORAGE_ENGINE + MODULE_OUTPUT_NAME ha_rocksdb + LINK_LIBRARIES ${ATOMIC_EXTRA_LIBS} + COMPONENT rocksdb-engine) + +IF(NOT TARGET rocksdb) + # Bail out if compilation with rocksdb engine is not requested + RETURN() +ENDIF() + + + +CHECK_CXX_SOURCE_COMPILES(" +#if defined(_MSC_VER) && !defined(__thread) +#define __thread __declspec(thread) +#endif +int main() { + static __thread int tls; + tls=0; + return tls; +} +" HAVE_THREAD_LOCAL) +if(HAVE_THREAD_LOCAL) + ADD_DEFINITIONS(-DROCKSDB_SUPPORT_THREAD_LOCAL) +else() + MESSAGE(SEND_ERROR "The compiler failed the check for ROCKSDB_SUPPORT_THREAD_LOCAL. " + "MyRocks requires that feature.") +endif() + +INCLUDE(build_rocksdb.cmake) + +ADD_CONVENIENCE_LIBRARY(rocksdb_aux_lib + ha_rocksdb_proto.h + logger.h + rdb_comparator.h + rdb_cf_options.cc + rdb_cf_options.h + event_listener.cc + event_listener.h + rdb_perf_context.cc + rdb_perf_context.h + rdb_buff.h + rdb_mariadb_port.h + nosql_access.cc nosql_access.h +) + +ADD_DEPENDENCIES(rocksdb_aux_lib GenError) + +# MARIAROCKS-TODO: how to properly depend on -lrt ? +TARGET_LINK_LIBRARIES(rocksdb_aux_lib rocksdblib ${ZLIB_LIBRARY}) +if (UNIX AND NOT APPLE) + TARGET_LINK_LIBRARIES(rocksdb_aux_lib -lrt) +endif() +TARGET_LINK_LIBRARIES(rocksdb_aux_lib ${ATOMIC_EXTRA_LIBS}) + +# IF (WITH_JEMALLOC) +# FIND_LIBRARY(JEMALLOC_LIBRARY +# NAMES libjemalloc${PIC_EXT}.a jemalloc +# HINTS ${WITH_JEMALLOC}/lib) +# SET(rocksdb_static_libs ${rocksdb_static_libs} +# ${JEMALLOC_LIBRARY}) +# ADD_DEFINITIONS(-DROCKSDB_JEMALLOC) +# ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE) +# ENDIF() + +# MariaDB: Q: why does the upstream add libunwind for a particular +# storage engine? +#IF (WITH_UNWIND) +# FIND_LIBRARY(UNWIND_LIBRARY +# NAMES libunwind${PIC_EXT}.a unwind +# HINTS ${WITH_UNWIND}/lib) +# SET(rocksdb_static_libs ${rocksdb_static_libs} +# ${UNWIND_LIBRARY}) +#ENDIF() + + +TARGET_LINK_LIBRARIES(rocksdb rocksdb_aux_lib) + +CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU) +IF(HAVE_SCHED_GETCPU) + ADD_DEFINITIONS(-DHAVE_SCHED_GETCPU=1) +# MariaDB: don't do this: +# ADD_DEFINITIONS(-DZSTD_STATIC_LINKING_ONLY) +ENDIF() + +IF (WITH_TBB) + FIND_LIBRARY(TBB_LIBRARY + NAMES libtbb${PIC_EXT}.a tbb + HINTS ${WITH_TBB}/lib) + SET(rocksdb_static_libs ${rocksdb_static_libs} + ${TBB_LIBRARY}) + ADD_DEFINITIONS(-DTBB) +ENDIF() + +# +# MariaDB: Dynamic plugin build is not suitable with unittest ATM +# +#IF(WITH_UNIT_TESTS AND WITH_EMBEDDED_SERVER) +# ADD_SUBDIRECTORY(unittest) +#ENDIF() + +if (UNIX AND NOT APPLE) + SET(rocksdb_static_libs ${rocksdb_static_libs} "-lrt") +endif() + + +ADD_LIBRARY(rocksdb_tools STATIC + rocksdb/tools/ldb_tool.cc + rocksdb/tools/ldb_cmd.cc + rocksdb/tools/sst_dump_tool.cc +) + +MYSQL_ADD_EXECUTABLE(sst_dump rocksdb/tools/sst_dump.cc COMPONENT rocksdb-engine) +TARGET_LINK_LIBRARIES(sst_dump rocksdblib) + +MYSQL_ADD_EXECUTABLE(mariadb-ldb tools/mysql_ldb.cc COMPONENT rocksdb-engine) +TARGET_LINK_LIBRARIES(mariadb-ldb rocksdb_tools rocksdb_aux_lib dbug) + +CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/myrocks_hotbackup.py + ${CMAKE_CURRENT_BINARY_DIR}/myrocks_hotbackup @ONLY) +INSTALL_SCRIPT(${CMAKE_CURRENT_BINARY_DIR}/myrocks_hotbackup COMPONENT rocksdb-engine) + +IF(MSVC) + # RocksDB, the storage engine, overdoes "const" by adding + # additional const qualifiers to parameters of the overriden virtual functions + # This creates a lot of warnings, that we silence here. + ADD_DEFINITIONS(/wd4373) + # Some checks in C++ runtime that make debug build much slower + ADD_DEFINITIONS(-D_ITERATOR_DEBUG_LEVEL=0) + + # Temporarily disable "conversion from size_t .." warnings + IF(CMAKE_SIZEOF_VOID_P EQUAL 8) + SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /wd4267") + ENDIF() +ELSEIF(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + SET_TARGET_PROPERTIES(rocksdb_tools sst_dump mariadb-ldb PROPERTIES COMPILE_FLAGS "-Wno-error") +ENDIF() + +IF(GIT_EXECUTABLE AND EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/rocksdb/.git) + EXECUTE_PROCESS( + COMMAND ${GIT_EXECUTABLE} rev-parse HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/rocksdb + OUTPUT_VARIABLE OUT RESULT_VARIABLE RES) + IF(RES EQUAL 0) + STRING(REGEX REPLACE "\n$" "" ROCKSDB_GIT_HASH "${OUT}") + ENDIF() +ENDIF() +IF(ROCKSDB_GIT_HASH OR + (NOT EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/rdb_source_revision.h)) + CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/rdb_source_revision.h.in + ${CMAKE_CURRENT_BINARY_DIR}/rdb_source_revision.h ) +ENDIF() diff --git a/storage/rocksdb/README b/storage/rocksdb/README new file mode 100644 index 00000000..3af45592 --- /dev/null +++ b/storage/rocksdb/README @@ -0,0 +1,50 @@ +== Summary == +This directory contains RocksDB-based Storage Engine (RDBSE) for MySQL, +also known as "MyRocks". + +== Resources == +https://github.com/facebook/mysql-5.6/wiki/Getting-Started-with-MyRocks +https://www.facebook.com/groups/MyRocks/ + +== Coding Conventions == +The baseline for MyRocks coding conventions for the code in storage/rocksdb/ +is based on the default clang format with a few minor changes. The file +storage/rocksdb/.clang-format describes conventions and can be integrated +with Vim or Emacs as described here: +http://releases.llvm.org/3.6.0/tools/clang/docs/ClangFormat.html#vim-integration + +All code outside of storage/rocksdb/ should conform to the MySQL coding +conventions: +http://dev.mysql.com/doc/internals/en/coding-guidelines.html. + +Several refinements: + 0. There is an umbrella C++ namespace named "myrocks" for all MyRocks code. + 1. We introduced "RDB" as the super-short abbreviation for "RocksDB". We will + use it as a name prefix, with different capitalization (see below), to ease + up code navigation with ctags and grep. + N.B. For ease of matching, we'll keep the variables and functions dealing + with sysvars as close as possible to the outside visible names of + sysvars, which start with "rocksdb_" prefix, the outward storage + engine name. + 2. The names for classes, interfaces, and C++ structures (which act as + classes), start with prefix "Rdb_". + NB: For historical reasons, we'll keep the "ha_" class + name for ha_rocksdb class, which is an exception to the rule. + 3. The names for global objects and functions start with prefix "rdb_". + 4. The names for macros and constants start with prefix "RDB_". + 5. Regular class member names start with "m_". + 6. Static class member names start with "s_". + 7. Given the 80 character per line limit, we'll not always use full English + words in names, when a well known or easily recognizable abbreviation + exists (like "tx" for "transaction" or "param" for "parameter" etc). + 8. When needing to disambiguate, we use different suffixes for that, like + "_arg" for a function argument/parameter, "_arr" for a C style array, and + "_vect" for a std::vector etc. + +== Running Tests == +To run tests from rocksdb, rocksdb_rpl or other rocksdb_* packages, use the +following parameters: + --default-storage-engine=rocksdb + --skip-innodb + --default-tmp-storage-engine=MyISAM + --rocksdb diff --git a/storage/rocksdb/atomic_stat.h b/storage/rocksdb/atomic_stat.h new file mode 100644 index 00000000..04e59bd9 --- /dev/null +++ b/storage/rocksdb/atomic_stat.h @@ -0,0 +1,94 @@ +/* This is an atomic integer abstract data type, for high-performance + tracking of a single stat. It intentionally permits inconsistent + atomic operations and reads, for better performance. This means + that, though no data should ever be lost by this stat, reads of it + at any time may not include all changes up to any particular point. + + So, values read from these may only be approximately correct. + + If your use-case will fail under these conditions, do not use this. + + Copyright (C) 2012 - 2014 Steaphan Greene + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the + Free Software Foundation, Inc. + 51 Franklin Street, Fifth Floor + Boston, MA 02110-1301, USA. +*/ + +#ifndef _atomic_stat_h_ +#define _atomic_stat_h_ + +#include + +template < typename TYPE > +class atomic_stat { +public: + // Initialize value to the default for the type + atomic_stat() : value_(TYPE()) {}; + + // This enforces a strict order, as all absolute sets should + void clear() { + value_.store(TYPE(), std::memory_order_seq_cst); + }; + + // Reads can get any valid value, it doesn't matter which, exactly + TYPE load() const { + return value_.load(std::memory_order_relaxed); + }; + + // This only supplies relative arithmetic operations + // These are all done atomically, and so can show up in any order + void inc(const TYPE &other) { + value_.fetch_add(other, std::memory_order_relaxed); + }; + + void dec(const TYPE &other) { + value_.fetch_sub(other, std::memory_order_relaxed); + }; + + void inc() { + value_.fetch_add(1, std::memory_order_relaxed); + }; + + void dec() { + value_.fetch_sub(1, std::memory_order_relaxed); + }; + + // This will make one attempt to set the value to the max of + // the current value, and the passed-in value. It can fail + // for any reason, and we only try it once. + void set_max_maybe(const TYPE &new_val) { + TYPE old_val = value_; + if (new_val > old_val) { + value_.compare_exchange_weak(old_val, new_val, + std::memory_order_relaxed, + std::memory_order_relaxed); + } + }; + + // This will make one attempt to assign the value to the passed-in + // value. It can fail for any reason, and we only try it once. + void set_maybe(const TYPE &new_val) { + TYPE old_val = value_; + value_.compare_exchange_weak(old_val, new_val, + std::memory_order_relaxed, + std::memory_order_relaxed); + }; + +private: + std::atomic value_; +}; + +#endif // _atomic_stat_h_ diff --git a/storage/rocksdb/build_rocksdb.cmake b/storage/rocksdb/build_rocksdb.cmake new file mode 100644 index 00000000..762368b4 --- /dev/null +++ b/storage/rocksdb/build_rocksdb.cmake @@ -0,0 +1,506 @@ + +if(POLICY CMP0042) + cmake_policy(SET CMP0042 NEW) +endif() + +SET(ROCKSDB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/rocksdb) + +INCLUDE_DIRECTORIES( + ${CMAKE_CURRENT_BINARY_DIR} + ${ROCKSDB_SOURCE_DIR} + ${ROCKSDB_SOURCE_DIR}/include + ${ROCKSDB_SOURCE_DIR}/third-party/gtest-1.7.0/fused-src +) + +list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/") + +if(WIN32) + # include(${ROCKSDB_SOURCE_DIR}/thirdparty.inc) +else() + option(WITH_ROCKSDB_JEMALLOC "build RocksDB with JeMalloc" OFF) + if(CMAKE_SYSTEM_NAME STREQUAL "FreeBSD") + # FreeBSD has jemaloc as default malloc + add_definitions(-DROCKSDB_JEMALLOC) + ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE) + set(WITH_JEMALLOC ON) + elseif(WITH_ROCKSDB_JEMALLOC) + find_package(JeMalloc REQUIRED) + add_definitions(-DROCKSDB_JEMALLOC) + ADD_DEFINITIONS(-DROCKSDB_MALLOC_USABLE_SIZE) + include_directories(${JEMALLOC_INCLUDE_DIR}) + endif() +endif() + + +# Optional compression libraries. + +include(CheckFunctionExists) +macro(check_lib package var) + STRING(TOUPPER ${package} PACKAGE_NAME) + SET(WITH_ROCKSDB_${package} AUTO CACHE STRING + "Build RocksDB with ${package} compression. Possible values are 'ON', 'OFF', 'AUTO' and default is 'AUTO'") + + IF (NOT ${WITH_ROCKSDB_${package}} STREQUAL "OFF") + FIND_PACKAGE(${package} QUIET) + SET(HAVE_ROCKSDB_${PACKAGE_NAME} TRUE) + IF (${${PACKAGE_NAME}_FOUND}) + IF(${ARGC} GREATER 2) + SET(CMAKE_REQUIRED_LIBRARIES ${${var}_LIBRARIES}) + CHECK_FUNCTION_EXISTS(${ARGV2} ${var}_VALID) + UNSET(CMAKE_REQUIRED_LIBRARIES) + ELSE() + SET(${var}_VALID TRUE) + ENDIF() + ENDIF() + ENDIF() + ADD_FEATURE_INFO(ROCKSDB_${PACKAGE_NAME} HAVE_ROCKSDB_${PACKAGE_NAME} "${package} Compression in the RocksDB storage engine") + + IF(${${var}_VALID}) + MESSAGE_ONCE(rocksdb_${var} "Found ${package}: ${${var}_LIBRARIES}") + add_definitions(-D${PACKAGE_NAME}) + include_directories(${${var}_INCLUDE_DIR}) + list(APPEND THIRDPARTY_LIBS ${${var}_LIBRARIES}) + ELSEIF(${${PACKAGE_NAME}_FOUND}) + MESSAGE_ONCE(rocksdb_${var} "Found unusable ${package}: ${${var}_LIBRARIES} [${ARGV2}]") + ELSE() + MESSAGE_ONCE(rocksdb_${var} "Could NOT find ${package}") + ENDIF() + + IF (${WITH_ROCKSDB_${package}} STREQUAL "ON" AND NOT ${${PACKAGE_NAME}_FOUND}) + MESSAGE(FATAL_ERROR + "${package} library was not found, but WITH_ROCKSDB_${package} option is ON.\ + Either set WITH_ROCKSDB_${package} to OFF, or make sure ${package} is installed") + endif() +endmacro() + +check_lib(LZ4 LZ4) +check_lib(BZip2 BZIP2) +check_lib(snappy snappy) # rocksdb/cmake/modules/Findsnappy.cmake violates the convention +check_lib(ZSTD ZSTD ZDICT_trainFromBuffer) + +add_definitions(-DZLIB) +list(APPEND THIRDPARTY_LIBS ${ZLIB_LIBRARY}) +ADD_FEATURE_INFO(ROCKSDB_ZLIB "ON" "zlib Compression in the RocksDB storage engine") + +if(CMAKE_SYSTEM_NAME MATCHES "Cygwin") + add_definitions(-fno-builtin-memcmp -DCYGWIN) +elseif(CMAKE_SYSTEM_NAME MATCHES "Darwin") + add_definitions(-DOS_MACOSX) +elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") + add_definitions(-DOS_LINUX) +elseif(CMAKE_SYSTEM_NAME MATCHES "SunOS") + add_definitions(-DOS_SOLARIS) +elseif(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") + add_definitions(-DOS_FREEBSD) +elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD") + add_definitions(-DOS_NETBSD) +elseif(CMAKE_SYSTEM_NAME MATCHES "OpenBSD") + add_definitions(-DOS_OPENBSD) +elseif(CMAKE_SYSTEM_NAME MATCHES "DragonFly") + add_definitions(-DOS_DRAGONFLYBSD) +elseif(CMAKE_SYSTEM_NAME MATCHES "Android") + add_definitions(-DOS_ANDROID) +elseif(CMAKE_SYSTEM_NAME MATCHES "Windows") + add_definitions(-DOS_WIN) +endif() + +IF(MSVC) + add_definitions(/wd4244) +ENDIF() +if(NOT WIN32) + add_definitions(-DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX) +endif() + +include(CheckCCompilerFlag) +# ppc64 or ppc64le or powerpc64 (BSD) +if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64|powerpc64") + CHECK_C_COMPILER_FLAG("-maltivec" HAS_ALTIVEC) + if(HAS_ALTIVEC) + message(STATUS " HAS_ALTIVEC yes") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maltivec") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec") + endif(HAS_ALTIVEC) + if(NOT CMAKE_C_FLAGS MATCHES "m(cpu|tune)") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mcpu=power8") + endif() + if(NOT CMAKE_CXX_FLAGS MATCHES "m(cpu|tune)") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mcpu=power8") + endif() + ADD_DEFINITIONS(-DHAVE_POWER8 -DHAS_ALTIVEC) +endif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64|powerpc64") + +if(CMAKE_SYSTEM_PROCESSOR STREQUAL "riscv64") + set(SYSTEM_LIBS ${SYSTEM_LIBS} -latomic) +endif() + +option(WITH_FALLOCATE "build with fallocate" ON) + +if(WITH_FALLOCATE AND UNIX) + include(CheckCSourceCompiles) + CHECK_C_SOURCE_COMPILES(" +#include +#include +int main() { + int fd = open(\"/dev/null\", 0); + fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, 1024); +} +" HAVE_FALLOCATE) + if(HAVE_FALLOCATE) + add_definitions(-DROCKSDB_FALLOCATE_PRESENT) + endif() +endif() + +CHECK_FUNCTION_EXISTS(malloc_usable_size HAVE_MALLOC_USABLE_SIZE) +if(HAVE_MALLOC_USABLE_SIZE) + add_definitions(-DROCKSDB_MALLOC_USABLE_SIZE) +endif() + +include_directories(${ROCKSDB_SOURCE_DIR}) +include_directories(${ROCKSDB_SOURCE_DIR}/include) +include_directories(SYSTEM ${ROCKSDB_SOURCE_DIR}/third-party/gtest-1.7.0/fused-src) + +find_package(Threads REQUIRED) +if(WIN32) + set(SYSTEM_LIBS ${SYSTEM_LIBS} Shlwapi.lib Rpcrt4.lib) +else() + set(SYSTEM_LIBS ${CMAKE_THREAD_LIBS_INIT} ${LIBRT} ${CMAKE_DL_LIBS}) +endif() + +set(ROCKSDB_LIBS rocksdblib}) +set(LIBS ${ROCKSDB_LIBS} ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) + +#add_subdirectory(${ROCKSDB_SOURCE_DIR}/tools) + +# Main library source code +# Note : RocksDB has a lot of unittests. We should not include these files +# in the build, because 1. they are not needed and 2. gtest causes warnings +# in windows build, which are treated as errors and cause the build to fail. +# +# Unit tests themselves: +# - *_test.cc +# - *_bench.cc +# +# - table/mock_table.cc +# - utilities/cassandra/cassandra_compaction_filter.cc +# - utilities/cassandra/format.cc +# - utilities/cassandra/merge_operator.cc +# - utilities/cassandra/test_utils.cc +# +set(ROCKSDB_SOURCES + cache/clock_cache.cc + cache/lru_cache.cc + cache/sharded_cache.cc + db/arena_wrapped_db_iter.cc + db/builder.cc + db/c.cc + db/column_family.cc + db/compacted_db_impl.cc + db/compaction/compaction.cc + db/compaction/compaction_iterator.cc + db/compaction/compaction_picker.cc + db/compaction/compaction_job.cc + db/compaction/compaction_picker_fifo.cc + db/compaction/compaction_picker_level.cc + db/compaction/compaction_picker_universal.cc + db/convenience.cc + db/db_filesnapshot.cc + db/db_impl/db_impl.cc + db/db_impl/db_impl_write.cc + db/db_impl/db_impl_compaction_flush.cc + db/db_impl/db_impl_files.cc + db/db_impl/db_impl_open.cc + db/db_impl/db_impl_debug.cc + db/db_impl/db_impl_experimental.cc + db/db_impl/db_impl_readonly.cc + db/db_impl/db_impl_secondary.cc + db/db_info_dumper.cc + db/db_iter.cc + db/dbformat.cc + db/error_handler.cc + db/event_helpers.cc + db/experimental.cc + db/external_sst_file_ingestion_job.cc + db/file_indexer.cc + db/flush_job.cc + db/flush_scheduler.cc + db/forward_iterator.cc + db/import_column_family_job.cc + db/internal_stats.cc + db/logs_with_prep_tracker.cc + db/log_reader.cc + db/log_writer.cc + db/malloc_stats.cc + db/memtable.cc + db/memtable_list.cc + db/merge_helper.cc + db/merge_operator.cc + db/range_del_aggregator.cc + db/range_tombstone_fragmenter.cc + db/repair.cc + db/snapshot_impl.cc + db/table_cache.cc + db/table_properties_collector.cc + db/transaction_log_impl.cc + db/trim_history_scheduler.cc + db/version_builder.cc + db/version_edit.cc + db/version_set.cc + db/wal_manager.cc + db/write_batch.cc + db/write_batch_base.cc + db/write_controller.cc + db/write_thread.cc + env/env.cc + env/env_chroot.cc + env/env_encryption.cc + env/env_hdfs.cc + env/file_system.cc + env/mock_env.cc + file/delete_scheduler.cc + file/file_prefetch_buffer.cc + file/file_util.cc + file/filename.cc + file/random_access_file_reader.cc + file/read_write_util.cc + file/readahead_raf.cc + file/sequence_file_reader.cc + file/sst_file_manager_impl.cc + file/writable_file_writer.cc + logging/auto_roll_logger.cc + logging/event_logger.cc + logging/log_buffer.cc + memory/arena.cc + memory/concurrent_arena.cc + memory/jemalloc_nodump_allocator.cc + memtable/alloc_tracker.cc + memtable/hash_linklist_rep.cc + memtable/hash_skiplist_rep.cc + memtable/skiplistrep.cc + memtable/vectorrep.cc + memtable/write_buffer_manager.cc + monitoring/histogram.cc + monitoring/histogram_windowing.cc + monitoring/in_memory_stats_history.cc + monitoring/instrumented_mutex.cc + monitoring/iostats_context.cc + monitoring/perf_context.cc + monitoring/perf_level.cc + monitoring/persistent_stats_history.cc + monitoring/statistics.cc + monitoring/thread_status_impl.cc + monitoring/thread_status_updater.cc + monitoring/thread_status_util.cc + monitoring/thread_status_util_debug.cc + options/cf_options.cc + options/db_options.cc + options/options.cc + options/options_helper.cc + options/options_parser.cc + options/options_sanity_check.cc + port/stack_trace.cc + table/adaptive/adaptive_table_factory.cc + table/block_based/block.cc + table/block_based/block_based_filter_block.cc + table/block_based/block_based_table_builder.cc + table/block_based/block_based_table_factory.cc + table/block_based/block_based_table_reader.cc + table/block_based/block_builder.cc + table/block_based/block_prefix_index.cc + table/block_based/data_block_hash_index.cc + table/block_based/data_block_footer.cc + table/block_based/filter_block_reader_common.cc + table/block_based/filter_policy.cc + table/block_based/flush_block_policy.cc + table/block_based/full_filter_block.cc + table/block_based/index_builder.cc + table/block_based/parsed_full_filter_block.cc + table/block_based/partitioned_filter_block.cc + table/block_based/uncompression_dict_reader.cc + table/block_fetcher.cc + table/cuckoo/cuckoo_table_builder.cc + table/cuckoo/cuckoo_table_factory.cc + table/cuckoo/cuckoo_table_reader.cc + table/format.cc + table/get_context.cc + table/iterator.cc + table/merging_iterator.cc + table/meta_blocks.cc + table/persistent_cache_helper.cc + table/plain/plain_table_bloom.cc + table/plain/plain_table_builder.cc + table/plain/plain_table_factory.cc + table/plain/plain_table_index.cc + table/plain/plain_table_key_coding.cc + table/plain/plain_table_reader.cc + table/sst_file_reader.cc + table/sst_file_writer.cc + table/table_properties.cc + table/two_level_iterator.cc + test_util/sync_point.cc + test_util/sync_point_impl.cc + test_util/testutil.cc + test_util/transaction_test_util.cc + tools/block_cache_analyzer/block_cache_trace_analyzer.cc + tools/dump/db_dump_tool.cc + tools/ldb_cmd.cc + tools/ldb_tool.cc + tools/sst_dump_tool.cc + tools/trace_analyzer_tool.cc + trace_replay/trace_replay.cc + trace_replay/block_cache_tracer.cc + util/coding.cc + util/compaction_job_stats_impl.cc + util/comparator.cc + util/compression_context_cache.cc + util/concurrent_task_limiter_impl.cc + util/crc32c.cc + util/dynamic_bloom.cc + util/hash.cc + util/murmurhash.cc + util/random.cc + util/rate_limiter.cc + util/slice.cc + util/file_checksum_helper.cc + util/status.cc + util/string_util.cc + util/thread_local.cc + util/threadpool_imp.cc + util/xxhash.cc + utilities/backupable/backupable_db.cc + utilities/blob_db/blob_compaction_filter.cc + utilities/blob_db/blob_db.cc + utilities/blob_db/blob_db_impl.cc + utilities/blob_db/blob_db_impl_filesnapshot.cc + utilities/blob_db/blob_dump_tool.cc + utilities/blob_db/blob_file.cc + utilities/blob_db/blob_log_reader.cc + utilities/blob_db/blob_log_writer.cc + utilities/blob_db/blob_log_format.cc + utilities/checkpoint/checkpoint_impl.cc + utilities/compaction_filters/remove_emptyvalue_compactionfilter.cc + utilities/debug.cc + utilities/env_mirror.cc + utilities/env_timed.cc + utilities/leveldb_options/leveldb_options.cc + utilities/memory/memory_util.cc + utilities/merge_operators/bytesxor.cc + utilities/merge_operators/max.cc + utilities/merge_operators/put.cc + utilities/merge_operators/sortlist.cc + utilities/merge_operators/string_append/stringappend.cc + utilities/merge_operators/string_append/stringappend2.cc + utilities/merge_operators/uint64add.cc + utilities/object_registry.cc + utilities/option_change_migration/option_change_migration.cc + utilities/options/options_util.cc + utilities/persistent_cache/block_cache_tier.cc + utilities/persistent_cache/block_cache_tier_file.cc + utilities/persistent_cache/block_cache_tier_metadata.cc + utilities/persistent_cache/persistent_cache_tier.cc + utilities/persistent_cache/volatile_tier_impl.cc + utilities/simulator_cache/cache_simulator.cc + utilities/simulator_cache/sim_cache.cc + utilities/table_properties_collectors/compact_on_deletion_collector.cc + utilities/trace/file_trace_reader_writer.cc + utilities/transactions/optimistic_transaction_db_impl.cc + utilities/transactions/optimistic_transaction.cc + utilities/transactions/pessimistic_transaction.cc + utilities/transactions/pessimistic_transaction_db.cc + utilities/transactions/snapshot_checker.cc + utilities/transactions/transaction_base.cc + utilities/transactions/transaction_db_mutex_impl.cc + utilities/transactions/transaction_lock_mgr.cc + utilities/transactions/transaction_util.cc + utilities/transactions/write_prepared_txn.cc + utilities/transactions/write_prepared_txn_db.cc + utilities/transactions/write_unprepared_txn.cc + utilities/transactions/write_unprepared_txn_db.cc + utilities/ttl/db_ttl_impl.cc + utilities/write_batch_with_index/write_batch_with_index.cc + utilities/write_batch_with_index/write_batch_with_index_internal.cc +) + + +if(WIN32) + list(APPEND ROCKSDB_SOURCES + port/win/io_win.cc + port/win/env_win.cc + port/win/env_default.cc + port/win/port_win.cc + port/win/win_logger.cc + port/win/win_thread.cc + port/win/xpress_win.cc) +else() + list(APPEND ROCKSDB_SOURCES + port/port_posix.cc + env/env_posix.cc + env/io_posix.cc + env/fs_posix.cc) + # ppc64 or ppc64le + if(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64") + enable_language(ASM) + list(APPEND ROCKSDB_SOURCES + util/crc32c_ppc.c + util/crc32c_ppc_asm.S) + endif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc64") + # aarch + if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") + INCLUDE(CheckCXXCompilerFlag) + CHECK_CXX_COMPILER_FLAG("-march=armv8-a+crc+crypto" HAS_ARMV8_CRC) + if(HAS_ARMV8_CRC) + message(STATUS " HAS_ARMV8_CRC yes") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+crc+crypto -Wno-unused-function") + list(APPEND ROCKSDB_SOURCES + util/crc32c_arm64.cc) + endif(HAS_ARMV8_CRC) + endif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|AARCH64") +endif() +SET(SOURCES) +FOREACH(s ${ROCKSDB_SOURCES}) + list(APPEND SOURCES ${ROCKSDB_SOURCE_DIR}/${s}) +ENDFOREACH() + +if(MSVC) + add_definitions(-DHAVE_SSE42 -DHAVE_PCLMUL) + # Workaround broken compilation with -DWIN32_LEAN_AND_MEAN + # (https://github.com/facebook/rocksdb/issues/4344) + set_source_files_properties(${ROCKSDB_SOURCE_DIR}/port/win/env_win.cc + PROPERTIES COMPILE_FLAGS "/FI\"windows.h\" /FI\"winioctl.h\"") + + # Workaround Win8.1 SDK bug, that breaks /permissive- + string(REPLACE "/permissive-" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +else() + set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul ${CXX11_FLAGS}") + + CHECK_CXX_SOURCE_COMPILES(" +#include +#include +#include +int main() { + volatile uint32_t x = _mm_crc32_u32(0, 0); + const auto a = _mm_set_epi64x(0, 0); + const auto b = _mm_set_epi64x(0, 0); + const auto c = _mm_clmulepi64_si128(a, b, 0x00); + auto d = _mm_cvtsi128_si64(c); +} +" HAVE_SSE42) + if(HAVE_SSE42) + set_source_files_properties(${ROCKSDB_SOURCE_DIR}/util/crc32c.cc + PROPERTIES COMPILE_FLAGS "-DHAVE_SSE42 -DHAVE_PCLMUL -msse4.2 -mpclmul") + endif() + unset(CMAKE_REQUIRED_FLAGS) +endif() + +IF(CMAKE_VERSION VERSION_GREATER "2.8.10") + STRING(TIMESTAMP GIT_DATE_TIME "%Y-%m-%d %H:%M:%S") +ENDIF() + +CONFIGURE_FILE(${ROCKSDB_SOURCE_DIR}/util/build_version.cc.in build_version.cc @ONLY) +INCLUDE_DIRECTORIES(${ROCKSDB_SOURCE_DIR}/util) +list(APPEND SOURCES ${CMAKE_CURRENT_BINARY_DIR}/build_version.cc) + +ADD_CONVENIENCE_LIBRARY(rocksdblib ${SOURCES}) +target_link_libraries(rocksdblib ${THIRDPARTY_LIBS} ${SYSTEM_LIBS}) +IF(CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set_target_properties(rocksdblib PROPERTIES COMPILE_FLAGS "-fPIC -fno-builtin-memcmp -Wno-error") +endif() diff --git a/storage/rocksdb/event_listener.cc b/storage/rocksdb/event_listener.cc new file mode 100644 index 00000000..4995e407 --- /dev/null +++ b/storage/rocksdb/event_listener.cc @@ -0,0 +1,96 @@ +/* + Copyright (c) 2015, Facebook, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ + +#include + +/* The C++ file's header */ +#include "./event_listener.h" + +/* C++ standard header files */ +#include +#include + +/* MySQL includes */ +#include + +/* MyRocks includes */ +#include "./ha_rocksdb.h" +#include "./properties_collector.h" +#include "./rdb_datadic.h" + +namespace myrocks { + +static std::vector extract_index_stats( + const std::vector &files, + const rocksdb::TablePropertiesCollection &props) { + std::vector ret; + for (auto fn : files) { + const auto it = props.find(fn); + DBUG_ASSERT(it != props.end()); + std::vector stats; + Rdb_tbl_prop_coll::read_stats_from_tbl_props(it->second, &stats); + ret.insert(ret.end(), stats.begin(), stats.end()); + } + return ret; +} + +void Rdb_event_listener::update_index_stats( + const rocksdb::TableProperties &props) { + DBUG_ASSERT(m_ddl_manager != nullptr); + const auto tbl_props = + std::make_shared(props); + + std::vector stats; + Rdb_tbl_prop_coll::read_stats_from_tbl_props(tbl_props, &stats); + + m_ddl_manager->adjust_stats(stats); +} + +void Rdb_event_listener::OnCompactionCompleted( + rocksdb::DB *db, const rocksdb::CompactionJobInfo &ci) { + DBUG_ASSERT(db != nullptr); + DBUG_ASSERT(m_ddl_manager != nullptr); + + if (ci.status.ok()) { + m_ddl_manager->adjust_stats( + extract_index_stats(ci.output_files, ci.table_properties), + extract_index_stats(ci.input_files, ci.table_properties)); + } +} + +void Rdb_event_listener::OnFlushCompleted( + rocksdb::DB *db, const rocksdb::FlushJobInfo &flush_job_info) { + DBUG_ASSERT(db != nullptr); + update_index_stats(flush_job_info.table_properties); +} + +void Rdb_event_listener::OnExternalFileIngested( + rocksdb::DB *db, const rocksdb::ExternalFileIngestionInfo &info) { + DBUG_ASSERT(db != nullptr); + update_index_stats(info.table_properties); +} + +void Rdb_event_listener::OnBackgroundError( + rocksdb::BackgroundErrorReason reason, rocksdb::Status *status) { + rdb_log_status_error(*status, "Error detected in background"); + // NO_LINT_DEBUG + sql_print_error("RocksDB: BackgroundErrorReason: %d", (int)reason); + if (status->IsCorruption()) { + rdb_persist_corruption_marker(); + abort(); + } +} +} // namespace myrocks diff --git a/storage/rocksdb/event_listener.h b/storage/rocksdb/event_listener.h new file mode 100644 index 00000000..737973eb --- /dev/null +++ b/storage/rocksdb/event_listener.h @@ -0,0 +1,49 @@ +/* + Copyright (c) 2015, Facebook, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ +#pragma once + +#include "rocksdb/listener.h" + +namespace myrocks { + +class Rdb_ddl_manager; + +class Rdb_event_listener : public rocksdb::EventListener { + public: + Rdb_event_listener(const Rdb_event_listener &) = delete; + Rdb_event_listener &operator=(const Rdb_event_listener &) = delete; + + explicit Rdb_event_listener(Rdb_ddl_manager *const ddl_manager) + : m_ddl_manager(ddl_manager) {} + + void OnCompactionCompleted(rocksdb::DB *db, + const rocksdb::CompactionJobInfo &ci) override; + void OnFlushCompleted(rocksdb::DB *db, + const rocksdb::FlushJobInfo &flush_job_info) override; + void OnExternalFileIngested( + rocksdb::DB *db, + const rocksdb::ExternalFileIngestionInfo &ingestion_info) override; + + void OnBackgroundError(rocksdb::BackgroundErrorReason reason, + rocksdb::Status *status) override; + + private: + Rdb_ddl_manager *m_ddl_manager; + + void update_index_stats(const rocksdb::TableProperties &props); +}; + +} // namespace myrocks diff --git a/storage/rocksdb/get_rocksdb_files.sh b/storage/rocksdb/get_rocksdb_files.sh new file mode 100755 index 00000000..bd5128a8 --- /dev/null +++ b/storage/rocksdb/get_rocksdb_files.sh @@ -0,0 +1,27 @@ +#!/bin/bash +MKFILE=`mktemp` +# create and run a simple makefile +# include rocksdb make file relative to the path of this script +echo "include ./storage/rocksdb/rocksdb/src.mk +all: + @echo \$(LIB_SOURCES)" > $MKFILE +for f in `make --makefile $MKFILE` +do + echo ./rocksdb/$f +done +rm $MKFILE + +# create build_version.cc file. Only create one if it doesn't exists or if it is different +# this is so that we don't rebuild mysqld every time +bv=storage/rocksdb/rocksdb/util/build_version.cc +date=$(date +%F) +git_sha=$(pushd storage/rocksdb/rocksdb >/dev/null && git rev-parse HEAD 2>/dev/null && popd >/dev/null) +if [ ! -f $bv ] || [ -z $git_sha ] || [ ! `grep $git_sha $bv` ] +then +echo "#include \"build_version.h\" +const char* rocksdb_build_git_sha = +\"rocksdb_build_git_sha:$git_sha\"; +const char* rocksdb_build_git_date = +\"rocksdb_build_git_date:$date\"; +const char* rocksdb_build_compile_date = __DATE__;" > $bv +fi diff --git a/storage/rocksdb/ha_rocksdb.cc b/storage/rocksdb/ha_rocksdb.cc new file mode 100644 index 00000000..f89601b1 --- /dev/null +++ b/storage/rocksdb/ha_rocksdb.cc @@ -0,0 +1,14638 @@ +/* + Copyright (c) 2012, Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ + +#ifdef USE_PRAGMA_IMPLEMENTATION +#pragma implementation // gcc: Class implementation +#endif + +#define MYSQL_SERVER 1 + +/* For use of 'PRIu64': */ +#define __STDC_FORMAT_MACROS + +#include + +#include + +/* The C++ file's header */ +#include "./ha_rocksdb.h" + +/* C++ standard header files */ +#include +#include +#include +#include +#include +#include +#include +#include + +/* MySQL includes */ +#include +// #include +#include +#include "./debug_sync.h" +#include "./my_bit.h" +#include "./my_stacktrace.h" +#include "./my_sys.h" +#include "./sql_audit.h" +#include "./sql_table.h" +#include "./sql_hset.h" +#ifdef MARIAROCKS_NOT_YET +#endif + +/* RocksDB includes */ +#include "monitoring/histogram.h" +#include "rocksdb/compaction_filter.h" +#include "rocksdb/env.h" +#include "rocksdb/memory_allocator.h" +#include "rocksdb/persistent_cache.h" +#include "rocksdb/rate_limiter.h" +#include "rocksdb/slice_transform.h" +#include "rocksdb/thread_status.h" +#include "rocksdb/utilities/checkpoint.h" +#include "rocksdb/utilities/convenience.h" +#include "rocksdb/utilities/memory_util.h" +#include "rocksdb/utilities/sim_cache.h" +#include "rocksdb/utilities/write_batch_with_index.h" +#include "util/stop_watch.h" +#include "./rdb_source_revision.h" + +// MariaRocks: this is needed to access RocksDB debug syncpoints: +#include "test_util/sync_point.h" + +/* MyRocks includes */ +#include "./event_listener.h" +#include "./ha_rocksdb_proto.h" +#include "./logger.h" +#include "./nosql_access.h" +#include "./rdb_cf_manager.h" +#include "./rdb_cf_options.h" +#include "./rdb_converter.h" +#include "./rdb_datadic.h" +#include "./rdb_i_s.h" +#include "./rdb_index_merge.h" +#include "./rdb_mutex_wrapper.h" +#include "./rdb_psi.h" +#include "./rdb_threads.h" +#include "./rdb_mariadb_server_port.h" + +// Internal MySQL APIs not exposed in any header. +extern "C" { +/** + Mark transaction to rollback and mark error as fatal to a sub-statement. + @param thd Thread handle + @param all TRUE <=> rollback main transaction. +*/ +void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all); + +/** + * Get the user thread's binary logging format + * @param thd user thread + * @return Value to be used as index into the binlog_format_names array + */ +int thd_binlog_format(const MYSQL_THD thd); + +/** + * Check if binary logging is filtered for thread's current db. + * @param thd Thread handle + * @retval 1 the query is not filtered, 0 otherwise. + */ +bool thd_binlog_filter_ok(const MYSQL_THD thd); +} + +extern my_bool opt_core_file; + +// Needed in rocksdb_init_func +void ignore_db_dirs_append(const char *dirname_arg); + + +namespace myrocks { + +static st_global_stats global_stats; +static st_export_stats export_stats; +static st_memory_stats memory_stats; +static st_io_stall_stats io_stall_stats; + +const std::string DEFAULT_CF_NAME("default"); +const std::string DEFAULT_SYSTEM_CF_NAME("__system__"); +const std::string PER_INDEX_CF_NAME("$per_index_cf"); + +static std::vector rdb_indexes_to_recalc; + +#ifdef MARIADB_NOT_YET +class Rdb_explicit_snapshot : public explicit_snapshot { + public: + static std::shared_ptr create( + snapshot_info_st *ss_info, rocksdb::DB *db, + const rocksdb::Snapshot *snapshot) { + std::lock_guard lock(explicit_snapshot_mutex); + auto s = std::unique_ptr( + new rocksdb::ManagedSnapshot(db, snapshot)); + if (!s) { + return nullptr; + } + ss_info->snapshot_id = ++explicit_snapshot_counter; + auto ret = std::make_shared(*ss_info, std::move(s)); + if (!ret) { + return nullptr; + } + explicit_snapshots[ss_info->snapshot_id] = ret; + return ret; + } + + static std::string dump_snapshots() { + std::string str; + std::lock_guard lock(explicit_snapshot_mutex); + for (const auto &elem : explicit_snapshots) { + const auto &ss = elem.second.lock(); + DBUG_ASSERT(ss != nullptr); + const auto &info = ss->ss_info; + str += "\nSnapshot ID: " + std::to_string(info.snapshot_id) + + "\nBinlog File: " + info.binlog_file + + "\nBinlog Pos: " + std::to_string(info.binlog_pos) + + "\nGtid Executed: " + info.gtid_executed + "\n"; + } + + return str; + } + + static std::shared_ptr get( + const ulonglong snapshot_id) { + std::lock_guard lock(explicit_snapshot_mutex); + auto elem = explicit_snapshots.find(snapshot_id); + if (elem == explicit_snapshots.end()) { + return nullptr; + } + return elem->second.lock(); + } + + rocksdb::ManagedSnapshot *get_snapshot() { return snapshot.get(); } + + Rdb_explicit_snapshot(snapshot_info_st ss_info, + std::unique_ptr &&snapshot) + : explicit_snapshot(ss_info), snapshot(std::move(snapshot)) {} + + virtual ~Rdb_explicit_snapshot() { + std::lock_guard lock(explicit_snapshot_mutex); + explicit_snapshots.erase(ss_info.snapshot_id); + } + + private: + std::unique_ptr snapshot; + + static std::mutex explicit_snapshot_mutex; + static ulonglong explicit_snapshot_counter; + static std::unordered_map> + explicit_snapshots; +}; + +std::mutex Rdb_explicit_snapshot::explicit_snapshot_mutex; +ulonglong Rdb_explicit_snapshot::explicit_snapshot_counter = 0; +std::unordered_map> + Rdb_explicit_snapshot::explicit_snapshots; +#endif + +/** + Updates row counters based on the table type and operation type. +*/ +void ha_rocksdb::update_row_stats(const operation_type &type) { + DBUG_ASSERT(type < ROWS_MAX); + // Find if we are modifying system databases. + if (table->s && m_tbl_def->m_is_mysql_system_table) { + global_stats.system_rows[type].inc(); + } else { + global_stats.rows[type].inc(); + } +} + +void dbug_dump_database(rocksdb::DB *db); +static handler *rocksdb_create_handler(my_core::handlerton *hton, + my_core::TABLE_SHARE *table_arg, + my_core::MEM_ROOT *mem_root); + +static rocksdb::CompactRangeOptions getCompactRangeOptions( + int concurrency = 0) { + rocksdb::CompactRangeOptions compact_range_options; + compact_range_options.bottommost_level_compaction = + rocksdb::BottommostLevelCompaction::kForce; + compact_range_options.exclusive_manual_compaction = false; + if (concurrency > 0) { + compact_range_options.max_subcompactions = concurrency; + } + return compact_range_options; +} + +/////////////////////////////////////////////////////////// +// Parameters and settings +/////////////////////////////////////////////////////////// +static char *rocksdb_default_cf_options = nullptr; +static char *rocksdb_override_cf_options = nullptr; +static char *rocksdb_update_cf_options = nullptr; + +/////////////////////////////////////////////////////////// +// Globals +/////////////////////////////////////////////////////////// +handlerton *rocksdb_hton; + +rocksdb::TransactionDB *rdb = nullptr; +rocksdb::HistogramImpl *commit_latency_stats = nullptr; + +static std::shared_ptr rocksdb_stats; +static std::unique_ptr flashcache_aware_env; +static std::shared_ptr properties_collector_factory; + +Rdb_dict_manager dict_manager; +Rdb_cf_manager cf_manager; +Rdb_ddl_manager ddl_manager; +Rdb_binlog_manager binlog_manager; + +#if !defined(_WIN32) && !defined(__APPLE__) +Rdb_io_watchdog *io_watchdog = nullptr; +#endif +/** + MyRocks background thread control + N.B. This is besides RocksDB's own background threads + (@see rocksdb::CancelAllBackgroundWork()) +*/ + +static Rdb_background_thread rdb_bg_thread; + +static Rdb_manual_compaction_thread rdb_mc_thread; + +// List of table names (using regex) that are exceptions to the strict +// collation check requirement. +Regex_list_handler *rdb_collation_exceptions; + +static const char **rdb_get_error_messages(int nr); + +static void rocksdb_flush_all_memtables() { + const Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); + for (const auto &cf_handle : cf_manager.get_all_cf()) { + rdb->Flush(rocksdb::FlushOptions(), cf_handle); + } +} + +static void rocksdb_delete_column_family_stub( + THD *const /* thd */, struct st_mysql_sys_var *const /* var */, + void *const /* var_ptr */, const void *const /* save */) {} + +static int rocksdb_delete_column_family( + THD *const /* thd */, struct st_mysql_sys_var *const /* var */, + void *const /* var_ptr */, struct st_mysql_value *const value) { + // Return failure for now until the race condition between creating + // CF and deleting CF is resolved + return HA_EXIT_FAILURE; + + char buff[STRING_BUFFER_USUAL_SIZE]; + int len = sizeof(buff); + + DBUG_ASSERT(value != nullptr); + + if (const char *const cf = value->val_str(value, buff, &len)) { + auto &cf_manager = rdb_get_cf_manager(); + auto ret = cf_manager.drop_cf(cf); + if (ret == HA_EXIT_SUCCESS) { + // NO_LINT_DEBUG + sql_print_information("RocksDB: Dropped column family: %s\n", cf); + } else { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Failed to drop column family: %s, error: %d\n", + cf, ret); + } + + return ret; + } + + return HA_EXIT_SUCCESS; +} + +/////////////////////////////////////////////////////////// +// Hash map: table name => open table handler +/////////////////////////////////////////////////////////// + +namespace // anonymous namespace = not visible outside this source file +{ + +typedef Hash_set Rdb_table_set; + +class Rdb_open_tables_map { + private: + /* Hash table used to track the handlers of open tables */ + std::unordered_map m_table_map; + + /* The mutex used to protect the hash table */ + mutable mysql_mutex_t m_mutex; + + public: + void init() { + m_table_map.clear(); + mysql_mutex_init(rdb_psi_open_tbls_mutex_key, &m_mutex, MY_MUTEX_INIT_FAST); + } + + void free() { + m_table_map.clear(); + mysql_mutex_destroy(&m_mutex); + } + size_t count() { return m_table_map.size(); } + + Rdb_table_handler *get_table_handler(const char *const table_name); + void release_table_handler(Rdb_table_handler *const table_handler); + + std::vector get_table_names(void) const; +}; + +} // anonymous namespace + +static Rdb_open_tables_map rdb_open_tables; + +static std::string rdb_normalize_dir(std::string dir) { + while (dir.size() > 0 && dir.back() == '/') { + dir.resize(dir.size() - 1); + } + return dir; +} + +static int rocksdb_create_checkpoint( + THD *const thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const save MY_ATTRIBUTE((__unused__)), + struct st_mysql_value *const value) { + char buf[FN_REFLEN]; + int len = sizeof(buf); + const char *const checkpoint_dir_raw = value->val_str(value, buf, &len); + if (checkpoint_dir_raw) { + if (rdb != nullptr) { + std::string checkpoint_dir = rdb_normalize_dir(checkpoint_dir_raw); + // NO_LINT_DEBUG + sql_print_information("RocksDB: creating checkpoint in directory : %s\n", + checkpoint_dir.c_str()); + rocksdb::Checkpoint *checkpoint; + auto status = rocksdb::Checkpoint::Create(rdb, &checkpoint); + // We can only return HA_EXIT_FAILURE/HA_EXIT_SUCCESS here which is why + // the return code is ignored, but by calling into rdb_error_to_mysql, + // it will call my_error for us, which will propogate up to the client. + int rc __attribute__((__unused__)); + if (status.ok()) { + status = checkpoint->CreateCheckpoint(checkpoint_dir.c_str()); + delete checkpoint; + if (status.ok()) { + // NO_LINT_DEBUG + sql_print_information( + "RocksDB: created checkpoint in directory : %s\n", + checkpoint_dir.c_str()); + return HA_EXIT_SUCCESS; + } else { + rc = ha_rocksdb::rdb_error_to_mysql(status); + } + } else { + rc = ha_rocksdb::rdb_error_to_mysql(status); + } + } + } + return HA_EXIT_FAILURE; +} + +/* This method is needed to indicate that the + ROCKSDB_CREATE_CHECKPOINT command is not read-only */ +static void rocksdb_create_checkpoint_stub(THD *const thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save) {} + +static void rocksdb_force_flush_memtable_now_stub( + THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, + const void *const save) {} + +static int rocksdb_force_flush_memtable_now( + THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, + struct st_mysql_value *const value) { + // NO_LINT_DEBUG + sql_print_information("RocksDB: Manual memtable flush."); + rocksdb_flush_all_memtables(); + return HA_EXIT_SUCCESS; +} + +static void rocksdb_force_flush_memtable_and_lzero_now_stub( + THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, + const void *const save) {} + +static int rocksdb_force_flush_memtable_and_lzero_now( + THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, + struct st_mysql_value *const value) { + // NO_LINT_DEBUG + sql_print_information("RocksDB: Manual memtable and L0 flush."); + rocksdb_flush_all_memtables(); + + const Rdb_cf_manager &cf_manager = rdb_get_cf_manager(); + rocksdb::CompactionOptions c_options = rocksdb::CompactionOptions(); + rocksdb::ColumnFamilyMetaData metadata; + rocksdb::ColumnFamilyDescriptor cf_descr; + + int i, max_attempts = 3, num_errors = 0; + + for (const auto &cf_handle : cf_manager.get_all_cf()) { + for (i = 0; i < max_attempts; i++) { + rdb->GetColumnFamilyMetaData(cf_handle, &metadata); + cf_handle->GetDescriptor(&cf_descr); + c_options.output_file_size_limit = cf_descr.options.target_file_size_base; + + DBUG_ASSERT(metadata.levels[0].level == 0); + std::vector file_names; + for (auto &file : metadata.levels[0].files) { + file_names.emplace_back(file.db_path + file.name); + } + + if (file_names.empty()) { + break; + } + + rocksdb::Status s; + s = rdb->CompactFiles(c_options, cf_handle, file_names, 1); + + // Due to a race, it's possible for CompactFiles to collide + // with auto compaction, causing an error to return + // regarding file not found. In that case, retry. + if (s.IsInvalidArgument()) { + continue; + } + + if (!s.ok() && !s.IsAborted()) { + rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL); + return HA_EXIT_FAILURE; + } + break; + } + if (i == max_attempts) { + num_errors++; + } + } + + return num_errors == 0 ? HA_EXIT_SUCCESS : HA_EXIT_FAILURE; +} + +static void rocksdb_drop_index_wakeup_thread( + my_core::THD *const thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save); + +static my_bool rocksdb_pause_background_work = 0; +static mysql_mutex_t rdb_sysvars_mutex; +static mysql_mutex_t rdb_block_cache_resize_mutex; + +static void rocksdb_set_pause_background_work( + my_core::THD *const, + struct st_mysql_sys_var *const, + void *const, const void *const save) { + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + const my_bool pause_requested = *static_cast(save); + if (rocksdb_pause_background_work != pause_requested) { + if (pause_requested) { + rdb->PauseBackgroundWork(); + } else { + rdb->ContinueBackgroundWork(); + } + rocksdb_pause_background_work = pause_requested; + } + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +static void rocksdb_set_compaction_options(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, const void *save); + +static void rocksdb_set_table_stats_sampling_pct(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, + const void *save); + +static void rocksdb_set_rate_limiter_bytes_per_sec(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, + const void *save); + +static void rocksdb_set_sst_mgr_rate_bytes_per_sec(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, + const void *save); + +static void rocksdb_set_delayed_write_rate(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, const void *save); + +static void rocksdb_set_max_latest_deadlocks(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, const void *save); + +static void rdb_set_collation_exception_list(const char *exception_list); +static void rocksdb_set_collation_exception_list(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, + const void *save); + +static int rocksdb_validate_update_cf_options(THD *thd, + struct st_mysql_sys_var *var, + void *save, + st_mysql_value *value); + +static void rocksdb_set_update_cf_options(THD *thd, + struct st_mysql_sys_var *var, + void *var_ptr, const void *save); + +static int rocksdb_check_bulk_load( + THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *save, struct st_mysql_value *value); + +static int rocksdb_check_bulk_load_allow_unsorted( + THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *save, struct st_mysql_value *value); + +static void rocksdb_set_max_background_jobs(THD *thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save); +static void rocksdb_set_bytes_per_sync(THD *thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save); +static void rocksdb_set_wal_bytes_per_sync(THD *thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save); +static int rocksdb_validate_set_block_cache_size( + THD *thd, struct st_mysql_sys_var *const var, void *var_ptr, + struct st_mysql_value *value); +////////////////////////////////////////////////////////////////////////////// +// Options definitions +////////////////////////////////////////////////////////////////////////////// +static long long rocksdb_block_cache_size; +static long long rocksdb_sim_cache_size; +static my_bool rocksdb_use_clock_cache; +static double rocksdb_cache_high_pri_pool_ratio; +static my_bool rocksdb_cache_dump; +/* Use unsigned long long instead of uint64_t because of MySQL compatibility */ +static unsigned long long // NOLINT(runtime/int) + rocksdb_rate_limiter_bytes_per_sec; +static unsigned long long // NOLINT(runtime/int) + rocksdb_sst_mgr_rate_bytes_per_sec; +static unsigned long long rocksdb_delayed_write_rate; +static uint32_t rocksdb_max_latest_deadlocks; +static unsigned long // NOLINT(runtime/int) + rocksdb_persistent_cache_size_mb; +static ulong rocksdb_info_log_level; +static char *rocksdb_wal_dir; +static char *rocksdb_persistent_cache_path; +static ulong rocksdb_index_type; +static uint32_t rocksdb_flush_log_at_trx_commit; +static uint32_t rocksdb_debug_optimizer_n_rows; +static my_bool rocksdb_force_compute_memtable_stats; +static uint32_t rocksdb_force_compute_memtable_stats_cachetime; +static my_bool rocksdb_debug_optimizer_no_zero_cardinality; +static uint32_t rocksdb_wal_recovery_mode; +static uint32_t rocksdb_stats_level; +static uint32_t rocksdb_access_hint_on_compaction_start; +static char *rocksdb_compact_cf_name; +static char *rocksdb_delete_cf_name; +static char *rocksdb_checkpoint_name; +static my_bool rocksdb_signal_drop_index_thread; +static my_bool rocksdb_signal_remove_mariabackup_checkpoint; +static my_bool rocksdb_strict_collation_check = 1; +static my_bool rocksdb_ignore_unknown_options = 1; +static my_bool rocksdb_enable_2pc = 0; +static char *rocksdb_strict_collation_exceptions; +static my_bool rocksdb_collect_sst_properties = 1; +static my_bool rocksdb_force_flush_memtable_now_var = 0; +static my_bool rocksdb_force_flush_memtable_and_lzero_now_var = 0; +static my_bool rocksdb_enable_ttl = 1; +static my_bool rocksdb_enable_ttl_read_filtering = 1; +static int rocksdb_debug_ttl_rec_ts = 0; +static int rocksdb_debug_ttl_snapshot_ts = 0; +static int rocksdb_debug_ttl_read_filter_ts = 0; +static my_bool rocksdb_debug_ttl_ignore_pk = 0; +static my_bool rocksdb_reset_stats = 0; +static uint32_t rocksdb_io_write_timeout_secs = 0; +static uint32_t rocksdb_seconds_between_stat_computes = 3600; +static long long rocksdb_compaction_sequential_deletes = 0l; +static long long rocksdb_compaction_sequential_deletes_window = 0l; +static long long rocksdb_compaction_sequential_deletes_file_size = 0l; +static uint32_t rocksdb_validate_tables = 1; +static char *rocksdb_datadir; +static uint32_t rocksdb_table_stats_sampling_pct; +static my_bool rocksdb_enable_bulk_load_api = 1; +static my_bool rocksdb_print_snapshot_conflict_queries = 0; +static my_bool rocksdb_large_prefix = 0; +static my_bool rocksdb_allow_to_start_after_corruption = 0; +static char* rocksdb_git_hash; + +char *compression_types_val= + const_cast(get_rocksdb_supported_compression_types()); +static unsigned long rocksdb_write_policy = + rocksdb::TxnDBWritePolicy::WRITE_COMMITTED; + +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported +char *rocksdb_read_free_rpl_tables; +std::mutex rocksdb_read_free_rpl_tables_mutex; +#if defined(HAVE_PSI_INTERFACE) +Regex_list_handler rdb_read_free_regex_handler(key_rwlock_read_free_rpl_tables); +#else +Regex_list_handler rdb_read_free_regex_handler; +#endif +enum read_free_rpl_type { OFF = 0, PK_ONLY, PK_SK }; +static unsigned long rocksdb_read_free_rpl = read_free_rpl_type::OFF; +#endif + +static my_bool rocksdb_error_on_suboptimal_collation = 1; +static uint32_t rocksdb_stats_recalc_rate = 0; +static uint32_t rocksdb_debug_manual_compaction_delay = 0; +static uint32_t rocksdb_max_manual_compactions = 0; +static my_bool rocksdb_rollback_on_timeout = FALSE; +static my_bool rocksdb_enable_insert_with_update_caching = TRUE; + +std::atomic rocksdb_row_lock_deadlocks(0); +std::atomic rocksdb_row_lock_wait_timeouts(0); +std::atomic rocksdb_snapshot_conflict_errors(0); +std::atomic rocksdb_wal_group_syncs(0); +std::atomic rocksdb_manual_compactions_processed(0); +std::atomic rocksdb_manual_compactions_running(0); +#ifndef DBUG_OFF +std::atomic rocksdb_num_get_for_update_calls(0); +#endif + + + +/* + Remove directory with files in it. + Used to remove checkpoint created by mariabackup. +*/ +#ifdef _WIN32 +#include /* unlink*/ +#ifndef F_OK +#define F_OK 0 +#endif +#endif + +static int rmdir_force(const char *dir) { + if (access(dir, F_OK)) + return true; + + char path[FN_REFLEN]; + char sep[] = {FN_LIBCHAR, 0}; + int err = 0; + + MY_DIR *dir_info = my_dir(dir, MYF(MY_DONT_SORT | MY_WANT_STAT)); + if (!dir_info) + return 1; + + for (uint i = 0; i < dir_info->number_of_files; i++) { + FILEINFO *file = dir_info->dir_entry + i; + + strxnmov(path, sizeof(path), dir, sep, file->name, NULL); + + err = my_delete(path, 0); + + if (err) { + break; + } + } + + my_dirend(dir_info); + + if (!err) + err = rmdir(dir); + + return (err == 0) ? HA_EXIT_SUCCESS : HA_EXIT_FAILURE; +} + + +static void rocksdb_remove_mariabackup_checkpoint( + my_core::THD *const, + struct st_mysql_sys_var *const , + void *const var_ptr, const void *const) { + std::string mariabackup_checkpoint_dir(rocksdb_datadir); + + mariabackup_checkpoint_dir.append("/mariabackup-checkpoint"); + + if (unlink(mariabackup_checkpoint_dir.c_str()) == 0) + return; + + rmdir_force(mariabackup_checkpoint_dir.c_str()); +} + + +static std::unique_ptr rdb_init_rocksdb_db_options(void) { + auto o = std::unique_ptr(new rocksdb::DBOptions()); + + o->create_if_missing = true; + o->listeners.push_back(std::make_shared(&ddl_manager)); + o->info_log_level = rocksdb::InfoLogLevel::INFO_LEVEL; + o->max_subcompactions = DEFAULT_SUBCOMPACTIONS; + o->max_open_files = -2; // auto-tune to 50% open_files_limit + + o->two_write_queues = true; + o->manual_wal_flush = true; + return o; +} + +/* DBOptions contains Statistics and needs to be destructed last */ +static std::unique_ptr rocksdb_tbl_options = + std::unique_ptr( + new rocksdb::BlockBasedTableOptions()); +static std::unique_ptr rocksdb_db_options = + rdb_init_rocksdb_db_options(); + +static std::shared_ptr rocksdb_rate_limiter; + +/* This enum needs to be kept up to date with rocksdb::TxnDBWritePolicy */ +static const char *write_policy_names[] = {"write_committed", "write_prepared", + "write_unprepared", NullS}; + +static TYPELIB write_policy_typelib = {array_elements(write_policy_names) - 1, + "write_policy_typelib", + write_policy_names, nullptr}; + +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported +/* This array needs to be kept up to date with myrocks::read_free_rpl_type */ +static const char *read_free_rpl_names[] = {"OFF", "PK_ONLY", "PK_SK", NullS}; + +static TYPELIB read_free_rpl_typelib = {array_elements(read_free_rpl_names) - 1, + "read_free_rpl_typelib", + read_free_rpl_names, nullptr}; +#endif + +/* This enum needs to be kept up to date with rocksdb::InfoLogLevel */ +static const char *info_log_level_names[] = {"debug_level", "info_level", + "warn_level", "error_level", + "fatal_level", NullS}; + +static TYPELIB info_log_level_typelib = { + array_elements(info_log_level_names) - 1, "info_log_level_typelib", + info_log_level_names, nullptr}; + +static void rocksdb_set_rocksdb_info_log_level( + THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, + const void *const save) { + DBUG_ASSERT(save != nullptr); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + rocksdb_info_log_level = *static_cast(save); + rocksdb_db_options->info_log->SetInfoLogLevel( + static_cast(rocksdb_info_log_level)); + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +static void rocksdb_set_rocksdb_stats_level(THD *const thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save) { + DBUG_ASSERT(save != nullptr); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + rocksdb_db_options->statistics->set_stats_level( + static_cast( + *static_cast(save))); + // Actual stats level is defined at rocksdb dbopt::statistics::stats_level_ + // so adjusting rocksdb_stats_level here to make sure it points to + // the correct stats level. + rocksdb_stats_level = rocksdb_db_options->statistics->get_stats_level(); + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +static void rocksdb_set_reset_stats( + my_core::THD *const /* unused */, + my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr, const void *const save) { + DBUG_ASSERT(save != nullptr); + DBUG_ASSERT(rdb != nullptr); + DBUG_ASSERT(rocksdb_stats != nullptr); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + + *static_cast(var_ptr) = *static_cast(save); + + if (rocksdb_reset_stats) { + rocksdb::Status s = rdb->ResetStats(); + + // RocksDB will always return success. Let's document this assumption here + // as well so that we'll get immediately notified when contract changes. + DBUG_ASSERT(s == rocksdb::Status::OK()); + + s = rocksdb_stats->Reset(); + DBUG_ASSERT(s == rocksdb::Status::OK()); + } + + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +static void rocksdb_set_io_write_timeout( + my_core::THD *const thd MY_ATTRIBUTE((__unused__)), + my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { + DBUG_ASSERT(save != nullptr); + DBUG_ASSERT(rdb != nullptr); +#if !defined(_WIN32) && !defined(__APPLE__) + DBUG_ASSERT(io_watchdog != nullptr); +#endif + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + + const uint32_t new_val = *static_cast(save); + + rocksdb_io_write_timeout_secs = new_val; +#if !defined(_WIN32) && !defined(__APPLE__) + io_watchdog->reset_timeout(rocksdb_io_write_timeout_secs); +#endif + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +enum rocksdb_flush_log_at_trx_commit_type : unsigned int { + FLUSH_LOG_NEVER = 0, + FLUSH_LOG_SYNC, + FLUSH_LOG_BACKGROUND, + FLUSH_LOG_MAX /* must be last */ +}; + +static int rocksdb_validate_flush_log_at_trx_commit( + THD *const thd, + struct st_mysql_sys_var *const var, /* in: pointer to system variable */ + void *var_ptr, /* out: immediate result for update function */ + struct st_mysql_value *const value /* in: incoming value */) { + long long new_value; + + /* value is NULL */ + if (value->val_int(value, &new_value)) { + return HA_EXIT_FAILURE; + } + + if (rocksdb_db_options->allow_mmap_writes && new_value != FLUSH_LOG_NEVER) { + return HA_EXIT_FAILURE; + } + + *static_cast(var_ptr) = static_cast(new_value); + return HA_EXIT_SUCCESS; +} +static void rocksdb_compact_column_family_stub( + THD *const thd, struct st_mysql_sys_var *const var, void *const var_ptr, + const void *const save) {} + +static int rocksdb_compact_column_family(THD *const thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + struct st_mysql_value *const value); + +static const char *index_type_names[] = {"kBinarySearch", "kHashSearch", NullS}; + +static TYPELIB index_type_typelib = {array_elements(index_type_names) - 1, + "index_type_typelib", index_type_names, + nullptr}; + +const ulong RDB_MAX_LOCK_WAIT_SECONDS = 1024 * 1024 * 1024; +const ulong RDB_DEFAULT_MAX_ROW_LOCKS = 1024 * 1024; +const ulong RDB_MAX_ROW_LOCKS = 1024 * 1024 * 1024; +const ulong RDB_DEFAULT_BULK_LOAD_SIZE = 1000; +const ulong RDB_MAX_BULK_LOAD_SIZE = 1024 * 1024 * 1024; +const size_t RDB_DEFAULT_MERGE_BUF_SIZE = 64 * 1024 * 1024; +const size_t RDB_MIN_MERGE_BUF_SIZE = 100; +const size_t RDB_DEFAULT_MERGE_COMBINE_READ_SIZE = 1024 * 1024 * 1024; +const size_t RDB_MIN_MERGE_COMBINE_READ_SIZE = 100; +const size_t RDB_DEFAULT_MERGE_TMP_FILE_REMOVAL_DELAY = 0; +const size_t RDB_MIN_MERGE_TMP_FILE_REMOVAL_DELAY = 0; +const int64 RDB_DEFAULT_BLOCK_CACHE_SIZE = 512 * 1024 * 1024; +const int64 RDB_MIN_BLOCK_CACHE_SIZE = 1024; +const int RDB_MAX_CHECKSUMS_PCT = 100; +const ulong RDB_DEADLOCK_DETECT_DEPTH = 50; + +// TODO: 0 means don't wait at all, and we don't support it yet? +static MYSQL_THDVAR_ULONG(lock_wait_timeout, PLUGIN_VAR_RQCMDARG, + "Number of seconds to wait for lock", nullptr, + nullptr, /*default*/ 1, /*min*/ 1, + /*max*/ RDB_MAX_LOCK_WAIT_SECONDS, 0); + +static MYSQL_THDVAR_BOOL(deadlock_detect, PLUGIN_VAR_RQCMDARG, + "Enables deadlock detection", nullptr, nullptr, FALSE); + +static MYSQL_THDVAR_ULONG(deadlock_detect_depth, PLUGIN_VAR_RQCMDARG, + "Number of transactions deadlock detection will " + "traverse through before assuming deadlock", + nullptr, nullptr, + /*default*/ RDB_DEADLOCK_DETECT_DEPTH, + /*min*/ 2, + /*max*/ ULONG_MAX, 0); + +static MYSQL_THDVAR_BOOL( + commit_time_batch_for_recovery, PLUGIN_VAR_RQCMDARG, + "TransactionOptions::commit_time_batch_for_recovery for RocksDB", nullptr, + nullptr, TRUE); + +static MYSQL_THDVAR_BOOL( + trace_sst_api, PLUGIN_VAR_RQCMDARG, + "Generate trace output in the log for each call to the SstFileWriter", + nullptr, nullptr, FALSE); + +static MYSQL_THDVAR_BOOL( + bulk_load, PLUGIN_VAR_RQCMDARG, + "Use bulk-load mode for inserts. This disables " + "unique_checks and enables rocksdb_commit_in_the_middle.", + rocksdb_check_bulk_load, nullptr, FALSE); + +static MYSQL_THDVAR_BOOL(bulk_load_allow_sk, PLUGIN_VAR_RQCMDARG, + "Allow bulk loading of sk keys during bulk-load. " + "Can be changed only when bulk load is disabled.", + /* Intentionally reuse unsorted's check function */ + rocksdb_check_bulk_load_allow_unsorted, nullptr, + FALSE); + +static MYSQL_THDVAR_BOOL(bulk_load_allow_unsorted, PLUGIN_VAR_RQCMDARG, + "Allow unsorted input during bulk-load. " + "Can be changed only when bulk load is disabled.", + rocksdb_check_bulk_load_allow_unsorted, nullptr, + FALSE); + +static MYSQL_SYSVAR_BOOL(enable_bulk_load_api, rocksdb_enable_bulk_load_api, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Enables using SstFileWriter for bulk loading", + nullptr, nullptr, rocksdb_enable_bulk_load_api); + +static MYSQL_SYSVAR_STR(git_hash, rocksdb_git_hash, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Git revision of the RocksDB library used by MyRocks", + nullptr, nullptr, ROCKSDB_GIT_HASH); + +static MYSQL_THDVAR_STR(tmpdir, PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_MEMALLOC, + "Directory for temporary files during DDL operations.", + nullptr, nullptr, ""); + +#define DEFAULT_SKIP_UNIQUE_CHECK_TABLES ".*" +static MYSQL_THDVAR_STR( + skip_unique_check_tables, PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, + "Skip unique constraint checking for the specified tables", nullptr, + nullptr, DEFAULT_SKIP_UNIQUE_CHECK_TABLES); + +static MYSQL_THDVAR_BOOL( + commit_in_the_middle, PLUGIN_VAR_RQCMDARG, + "Commit rows implicitly every rocksdb_bulk_load_size, on bulk load/insert, " + "update and delete", + nullptr, nullptr, FALSE); + +static MYSQL_THDVAR_BOOL( + blind_delete_primary_key, PLUGIN_VAR_RQCMDARG, + "Deleting rows by primary key lookup, without reading rows (Blind Deletes)." + " Blind delete is disabled if the table has secondary key", + nullptr, nullptr, FALSE); + +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported + +static const char *DEFAULT_READ_FREE_RPL_TABLES = ".*"; + +static int rocksdb_validate_read_free_rpl_tables( + THD *thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), void *save, + struct st_mysql_value *value) { + char buff[STRING_BUFFER_USUAL_SIZE]; + int length = sizeof(buff); + const char *wlist_buf = value->val_str(value, buff, &length); + const auto wlist = wlist_buf ? wlist_buf : DEFAULT_READ_FREE_RPL_TABLES; + +#if defined(HAVE_PSI_INTERFACE) + Regex_list_handler regex_handler(key_rwlock_read_free_rpl_tables); +#else + Regex_list_handler regex_handler; +#endif + + if (!regex_handler.set_patterns(wlist)) { + warn_about_bad_patterns(®ex_handler, "rocksdb_read_free_rpl_tables"); + return HA_EXIT_FAILURE; + } + + *static_cast(save) = my_strdup(wlist, MYF(MY_WME)); + return HA_EXIT_SUCCESS; +} + +static void rocksdb_update_read_free_rpl_tables( + THD *thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), void *var_ptr, + const void *save) { + const auto wlist = *static_cast(save); + DBUG_ASSERT(wlist != nullptr); + + // This is bound to succeed since we've already checked for bad patterns in + // rocksdb_validate_read_free_rpl_tables + rdb_read_free_regex_handler.set_patterns(wlist); + + // update all table defs + struct Rdb_read_free_rpl_updater : public Rdb_tables_scanner { + int add_table(Rdb_tbl_def *tdef) override { + tdef->check_and_set_read_free_rpl_table(); + return HA_EXIT_SUCCESS; + } + } updater; + ddl_manager.scan_for_tables(&updater); + + if (wlist == DEFAULT_READ_FREE_RPL_TABLES) { + // If running SET var = DEFAULT, then rocksdb_validate_read_free_rpl_tables + // isn't called, and memory is never allocated for the value. Allocate it + // here. + *static_cast(var_ptr) = my_strdup(wlist, MYF(MY_WME)); + } else { + // Otherwise, we just reuse the value allocated from + // rocksdb_validate_read_free_rpl_tables. + *static_cast(var_ptr) = wlist; + } +} + +static MYSQL_SYSVAR_STR( + read_free_rpl_tables, rocksdb_read_free_rpl_tables, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC /*| PLUGIN_VAR_ALLOCATED*/, + "List of tables that will use read-free replication on the slave " + "(i.e. not lookup a row during replication)", + rocksdb_validate_read_free_rpl_tables, rocksdb_update_read_free_rpl_tables, + DEFAULT_READ_FREE_RPL_TABLES); + +static MYSQL_SYSVAR_ENUM( + read_free_rpl, rocksdb_read_free_rpl, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, + "Use read-free replication on the slave (i.e. no row lookup during " + "replication). Default is OFF, PK_SK will enable it on all tables with " + "primary key. PK_ONLY will enable it on tables where the only key is the " + "primary key (i.e. no secondary keys).", + nullptr, nullptr, read_free_rpl_type::OFF, &read_free_rpl_typelib); +#endif + +static MYSQL_THDVAR_BOOL(skip_bloom_filter_on_read, PLUGIN_VAR_RQCMDARG, + "Skip using bloom filter for reads", nullptr, nullptr, + FALSE); + +static MYSQL_THDVAR_ULONG(max_row_locks, PLUGIN_VAR_RQCMDARG, + "Maximum number of locks a transaction can have", + nullptr, nullptr, + /*default*/ RDB_DEFAULT_MAX_ROW_LOCKS, + /*min*/ 1, + /*max*/ RDB_MAX_ROW_LOCKS, 0); + +static MYSQL_THDVAR_ULONGLONG( + write_batch_max_bytes, PLUGIN_VAR_RQCMDARG, + "Maximum size of write batch in bytes. 0 means no limit.", nullptr, nullptr, + /* default */ 0, /* min */ 0, /* max */ SIZE_T_MAX, 1); + +static MYSQL_THDVAR_BOOL( + lock_scanned_rows, PLUGIN_VAR_RQCMDARG, + "Take and hold locks on rows that are scanned but not updated", nullptr, + nullptr, FALSE); + +static MYSQL_THDVAR_ULONG(bulk_load_size, PLUGIN_VAR_RQCMDARG, + "Max #records in a batch for bulk-load mode", nullptr, + nullptr, + /*default*/ RDB_DEFAULT_BULK_LOAD_SIZE, + /*min*/ 1, + /*max*/ RDB_MAX_BULK_LOAD_SIZE, 0); + +static MYSQL_THDVAR_ULONGLONG( + merge_buf_size, PLUGIN_VAR_RQCMDARG, + "Size to allocate for merge sort buffers written out to disk " + "during inplace index creation.", + nullptr, nullptr, + /* default (64MB) */ RDB_DEFAULT_MERGE_BUF_SIZE, + /* min (100B) */ RDB_MIN_MERGE_BUF_SIZE, + /* max */ SIZE_T_MAX, 1); + +static MYSQL_THDVAR_ULONGLONG( + merge_combine_read_size, PLUGIN_VAR_RQCMDARG, + "Size that we have to work with during combine (reading from disk) phase " + "of " + "external sort during fast index creation.", + nullptr, nullptr, + /* default (1GB) */ RDB_DEFAULT_MERGE_COMBINE_READ_SIZE, + /* min (100B) */ RDB_MIN_MERGE_COMBINE_READ_SIZE, + /* max */ SIZE_T_MAX, 1); + +static MYSQL_THDVAR_ULONGLONG( + merge_tmp_file_removal_delay_ms, PLUGIN_VAR_RQCMDARG, + "Fast index creation creates a large tmp file on disk during index " + "creation. Removing this large file all at once when index creation is " + "complete can cause trim stalls on Flash. This variable specifies a " + "duration to sleep (in milliseconds) between calling chsize() to truncate " + "the file in chunks. The chunk size is the same as merge_buf_size.", + nullptr, nullptr, + /* default (0ms) */ RDB_DEFAULT_MERGE_TMP_FILE_REMOVAL_DELAY, + /* min (0ms) */ RDB_MIN_MERGE_TMP_FILE_REMOVAL_DELAY, + /* max */ SIZE_T_MAX, 1); + +static MYSQL_THDVAR_INT( + manual_compaction_threads, PLUGIN_VAR_RQCMDARG, + "How many rocksdb threads to run for manual compactions", nullptr, nullptr, + /* default rocksdb.dboption max_subcompactions */ 0, + /* min */ 0, /* max */ 128, 0); + +static MYSQL_SYSVAR_BOOL( + create_if_missing, + *reinterpret_cast(&rocksdb_db_options->create_if_missing), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::create_if_missing for RocksDB", nullptr, nullptr, + rocksdb_db_options->create_if_missing); + +static MYSQL_SYSVAR_BOOL( + two_write_queues, + *reinterpret_cast(&rocksdb_db_options->two_write_queues), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::two_write_queues for RocksDB", nullptr, nullptr, + rocksdb_db_options->two_write_queues); + +static MYSQL_SYSVAR_BOOL( + manual_wal_flush, + *reinterpret_cast(&rocksdb_db_options->manual_wal_flush), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::manual_wal_flush for RocksDB", nullptr, nullptr, + rocksdb_db_options->manual_wal_flush); + +static MYSQL_SYSVAR_ENUM(write_policy, rocksdb_write_policy, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::write_policy for RocksDB", nullptr, + nullptr, rocksdb::TxnDBWritePolicy::WRITE_COMMITTED, + &write_policy_typelib); + +static MYSQL_SYSVAR_BOOL( + create_missing_column_families, + *reinterpret_cast( + &rocksdb_db_options->create_missing_column_families), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::create_missing_column_families for RocksDB", nullptr, nullptr, + rocksdb_db_options->create_missing_column_families); + +static MYSQL_SYSVAR_BOOL( + error_if_exists, + *reinterpret_cast(&rocksdb_db_options->error_if_exists), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::error_if_exists for RocksDB", nullptr, nullptr, + rocksdb_db_options->error_if_exists); + +static MYSQL_SYSVAR_BOOL( + paranoid_checks, + *reinterpret_cast(&rocksdb_db_options->paranoid_checks), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::paranoid_checks for RocksDB", nullptr, nullptr, + rocksdb_db_options->paranoid_checks); + +static MYSQL_SYSVAR_ULONGLONG( + rate_limiter_bytes_per_sec, rocksdb_rate_limiter_bytes_per_sec, + PLUGIN_VAR_RQCMDARG, "DBOptions::rate_limiter bytes_per_sec for RocksDB", + nullptr, rocksdb_set_rate_limiter_bytes_per_sec, /* default */ 0L, + /* min */ 0L, /* max */ MAX_RATE_LIMITER_BYTES_PER_SEC, 0); + +static MYSQL_SYSVAR_ULONGLONG( + sst_mgr_rate_bytes_per_sec, rocksdb_sst_mgr_rate_bytes_per_sec, + PLUGIN_VAR_RQCMDARG, + "DBOptions::sst_file_manager rate_bytes_per_sec for RocksDB", nullptr, + rocksdb_set_sst_mgr_rate_bytes_per_sec, + /* default */ DEFAULT_SST_MGR_RATE_BYTES_PER_SEC, + /* min */ 0L, /* max */ UINT64_MAX, 0); + +static MYSQL_SYSVAR_ULONGLONG(delayed_write_rate, rocksdb_delayed_write_rate, + PLUGIN_VAR_RQCMDARG, + "DBOptions::delayed_write_rate", nullptr, + rocksdb_set_delayed_write_rate, + rocksdb_db_options->delayed_write_rate, 0, + UINT64_MAX, 0); + +static MYSQL_SYSVAR_UINT(max_latest_deadlocks, rocksdb_max_latest_deadlocks, + PLUGIN_VAR_RQCMDARG, + "Maximum number of recent " + "deadlocks to store", + nullptr, rocksdb_set_max_latest_deadlocks, + rocksdb::kInitialMaxDeadlocks, 0, UINT32_MAX, 0); + +static MYSQL_SYSVAR_ENUM( + info_log_level, rocksdb_info_log_level, PLUGIN_VAR_RQCMDARG, + "Filter level for info logs to be written mysqld error log. " + "Valid values include 'debug_level', 'info_level', 'warn_level'" + "'error_level' and 'fatal_level'.", + nullptr, rocksdb_set_rocksdb_info_log_level, + rocksdb::InfoLogLevel::ERROR_LEVEL, &info_log_level_typelib); + +static MYSQL_THDVAR_INT( + perf_context_level, PLUGIN_VAR_RQCMDARG, + "Perf Context Level for rocksdb internal timer stat collection", nullptr, + nullptr, + /* default */ rocksdb::PerfLevel::kUninitialized, + /* min */ rocksdb::PerfLevel::kUninitialized, + /* max */ rocksdb::PerfLevel::kOutOfBounds - 1, 0); + +static MYSQL_SYSVAR_UINT( + wal_recovery_mode, rocksdb_wal_recovery_mode, PLUGIN_VAR_RQCMDARG, + "DBOptions::wal_recovery_mode for RocksDB. Default is kAbsoluteConsistency", + nullptr, nullptr, + /* default */ (uint)rocksdb::WALRecoveryMode::kAbsoluteConsistency, + /* min */ (uint)rocksdb::WALRecoveryMode::kTolerateCorruptedTailRecords, + /* max */ (uint)rocksdb::WALRecoveryMode::kSkipAnyCorruptedRecords, 0); + +static MYSQL_SYSVAR_UINT( + stats_level, rocksdb_stats_level, PLUGIN_VAR_RQCMDARG, + "Statistics Level for RocksDB. Default is 0 (kExceptHistogramOrTimers)", + nullptr, rocksdb_set_rocksdb_stats_level, + /* default */ (uint)rocksdb::StatsLevel::kExceptHistogramOrTimers, + /* min */ (uint)rocksdb::StatsLevel::kExceptHistogramOrTimers, + /* max */ (uint)rocksdb::StatsLevel::kAll, 0); + +static MYSQL_SYSVAR_SIZE_T(compaction_readahead_size, + rocksdb_db_options->compaction_readahead_size, + PLUGIN_VAR_RQCMDARG, + "DBOptions::compaction_readahead_size for RocksDB", + nullptr, nullptr, + rocksdb_db_options->compaction_readahead_size, + /* min */ 0L, /* max */ SIZE_T_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + new_table_reader_for_compaction_inputs, + *reinterpret_cast( + &rocksdb_db_options->new_table_reader_for_compaction_inputs), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::new_table_reader_for_compaction_inputs for RocksDB", nullptr, + nullptr, rocksdb_db_options->new_table_reader_for_compaction_inputs); + +static MYSQL_SYSVAR_UINT( + access_hint_on_compaction_start, rocksdb_access_hint_on_compaction_start, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::access_hint_on_compaction_start for RocksDB", nullptr, nullptr, + /* default */ (uint)rocksdb::Options::AccessHint::NORMAL, + /* min */ (uint)rocksdb::Options::AccessHint::NONE, + /* max */ (uint)rocksdb::Options::AccessHint::WILLNEED, 0); + +static MYSQL_SYSVAR_BOOL( + allow_concurrent_memtable_write, + *reinterpret_cast( + &rocksdb_db_options->allow_concurrent_memtable_write), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::allow_concurrent_memtable_write for RocksDB", nullptr, nullptr, + false); + +static MYSQL_SYSVAR_BOOL( + enable_write_thread_adaptive_yield, + *reinterpret_cast( + &rocksdb_db_options->enable_write_thread_adaptive_yield), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::enable_write_thread_adaptive_yield for RocksDB", nullptr, + nullptr, false); + +static MYSQL_SYSVAR_INT(max_open_files, rocksdb_db_options->max_open_files, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::max_open_files for RocksDB", nullptr, + nullptr, rocksdb_db_options->max_open_files, + /* min */ -2, /* max */ INT_MAX, 0); + +static MYSQL_SYSVAR_UINT64_T(max_total_wal_size, + rocksdb_db_options->max_total_wal_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::max_total_wal_size for RocksDB", nullptr, + nullptr, rocksdb_db_options->max_total_wal_size, + /* min */ 0, /* max */ LONGLONG_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + use_fsync, *reinterpret_cast(&rocksdb_db_options->use_fsync), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::use_fsync for RocksDB", nullptr, nullptr, + rocksdb_db_options->use_fsync); + +static MYSQL_SYSVAR_STR(wal_dir, rocksdb_wal_dir, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::wal_dir for RocksDB", nullptr, nullptr, + rocksdb_db_options->wal_dir.c_str()); + +static MYSQL_SYSVAR_STR( + persistent_cache_path, rocksdb_persistent_cache_path, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Path for BlockBasedTableOptions::persistent_cache for RocksDB", nullptr, + nullptr, ""); + +static MYSQL_SYSVAR_ULONG( + persistent_cache_size_mb, rocksdb_persistent_cache_size_mb, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Size of cache in MB for BlockBasedTableOptions::persistent_cache " + "for RocksDB", + nullptr, nullptr, rocksdb_persistent_cache_size_mb, + /* min */ 0L, /* max */ ULONG_MAX, 0); + +static MYSQL_SYSVAR_UINT64_T( + delete_obsolete_files_period_micros, + rocksdb_db_options->delete_obsolete_files_period_micros, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::delete_obsolete_files_period_micros for RocksDB", nullptr, + nullptr, rocksdb_db_options->delete_obsolete_files_period_micros, + /* min */ 0, /* max */ LONGLONG_MAX, 0); + +static MYSQL_SYSVAR_INT(max_background_jobs, + rocksdb_db_options->max_background_jobs, + PLUGIN_VAR_RQCMDARG, + "DBOptions::max_background_jobs for RocksDB", nullptr, + rocksdb_set_max_background_jobs, + rocksdb_db_options->max_background_jobs, + /* min */ -1, /* max */ MAX_BACKGROUND_JOBS, 0); + +static MYSQL_SYSVAR_UINT(max_subcompactions, + rocksdb_db_options->max_subcompactions, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::max_subcompactions for RocksDB", nullptr, + nullptr, rocksdb_db_options->max_subcompactions, + /* min */ 1, /* max */ MAX_SUBCOMPACTIONS, 0); + +static MYSQL_SYSVAR_SIZE_T(max_log_file_size, + rocksdb_db_options->max_log_file_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::max_log_file_size for RocksDB", nullptr, + nullptr, rocksdb_db_options->max_log_file_size, + /* min */ 0L, /* max */ SIZE_T_MAX, 0); + +static MYSQL_SYSVAR_SIZE_T(log_file_time_to_roll, + rocksdb_db_options->log_file_time_to_roll, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::log_file_time_to_roll for RocksDB", + nullptr, nullptr, + rocksdb_db_options->log_file_time_to_roll, + /* min */ 0L, /* max */ SIZE_T_MAX, 0); + +static MYSQL_SYSVAR_SIZE_T(keep_log_file_num, + rocksdb_db_options->keep_log_file_num, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::keep_log_file_num for RocksDB", nullptr, + nullptr, rocksdb_db_options->keep_log_file_num, + /* min */ 0L, /* max */ SIZE_T_MAX, 0); + +static MYSQL_SYSVAR_UINT64_T(max_manifest_file_size, + rocksdb_db_options->max_manifest_file_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::max_manifest_file_size for RocksDB", + nullptr, nullptr, + rocksdb_db_options->max_manifest_file_size, + /* min */ 0L, /* max */ ULONGLONG_MAX, 0); + +static MYSQL_SYSVAR_INT(table_cache_numshardbits, + rocksdb_db_options->table_cache_numshardbits, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::table_cache_numshardbits for RocksDB", + nullptr, nullptr, + rocksdb_db_options->table_cache_numshardbits, + // LRUCache limits this to 19 bits, anything greater + // fails to create a cache and returns a nullptr + /* min */ 0, /* max */ 19, 0); + +static MYSQL_SYSVAR_UINT64_T(wal_ttl_seconds, rocksdb_db_options->WAL_ttl_seconds, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::WAL_ttl_seconds for RocksDB", nullptr, + nullptr, rocksdb_db_options->WAL_ttl_seconds, + /* min */ 0L, /* max */ LONGLONG_MAX, 0); + +static MYSQL_SYSVAR_UINT64_T(wal_size_limit_mb, + rocksdb_db_options->WAL_size_limit_MB, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::WAL_size_limit_MB for RocksDB", nullptr, + nullptr, rocksdb_db_options->WAL_size_limit_MB, + /* min */ 0L, /* max */ LONGLONG_MAX, 0); + +static MYSQL_SYSVAR_SIZE_T(manifest_preallocation_size, + rocksdb_db_options->manifest_preallocation_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::manifest_preallocation_size for RocksDB", + nullptr, nullptr, + rocksdb_db_options->manifest_preallocation_size, + /* min */ 0L, /* max */ SIZE_T_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + use_direct_reads, + *reinterpret_cast(&rocksdb_db_options->use_direct_reads), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::use_direct_reads for RocksDB", nullptr, nullptr, + rocksdb_db_options->use_direct_reads); + +static MYSQL_SYSVAR_BOOL( + use_direct_io_for_flush_and_compaction, + *reinterpret_cast(&rocksdb_db_options->use_direct_io_for_flush_and_compaction), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::use_direct_io_for_flush_and_compaction for RocksDB", nullptr, nullptr, + rocksdb_db_options->use_direct_io_for_flush_and_compaction); + +static MYSQL_SYSVAR_BOOL( + allow_mmap_reads, + *reinterpret_cast(&rocksdb_db_options->allow_mmap_reads), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::allow_mmap_reads for RocksDB", nullptr, nullptr, + rocksdb_db_options->allow_mmap_reads); + +static MYSQL_SYSVAR_BOOL( + allow_mmap_writes, + *reinterpret_cast(&rocksdb_db_options->allow_mmap_writes), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::allow_mmap_writes for RocksDB", nullptr, nullptr, + rocksdb_db_options->allow_mmap_writes); + +static MYSQL_SYSVAR_BOOL( + is_fd_close_on_exec, + *reinterpret_cast(&rocksdb_db_options->is_fd_close_on_exec), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::is_fd_close_on_exec for RocksDB", nullptr, nullptr, + rocksdb_db_options->is_fd_close_on_exec); + +static MYSQL_SYSVAR_UINT(stats_dump_period_sec, + rocksdb_db_options->stats_dump_period_sec, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::stats_dump_period_sec for RocksDB", + nullptr, nullptr, + rocksdb_db_options->stats_dump_period_sec, + /* min */ 0, /* max */ INT_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + advise_random_on_open, + *reinterpret_cast(&rocksdb_db_options->advise_random_on_open), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::advise_random_on_open for RocksDB", nullptr, nullptr, + rocksdb_db_options->advise_random_on_open); + +static MYSQL_SYSVAR_SIZE_T(db_write_buffer_size, + rocksdb_db_options->db_write_buffer_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::db_write_buffer_size for RocksDB", + nullptr, nullptr, + rocksdb_db_options->db_write_buffer_size, + /* min */ 0L, /* max */ SIZE_T_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + use_adaptive_mutex, + *reinterpret_cast(&rocksdb_db_options->use_adaptive_mutex), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::use_adaptive_mutex for RocksDB", nullptr, nullptr, + rocksdb_db_options->use_adaptive_mutex); + +static MYSQL_SYSVAR_UINT64_T(bytes_per_sync, rocksdb_db_options->bytes_per_sync, + PLUGIN_VAR_RQCMDARG, + "DBOptions::bytes_per_sync for RocksDB", nullptr, + rocksdb_set_bytes_per_sync, + rocksdb_db_options->bytes_per_sync, + /* min */ 0L, /* max */ ULONGLONG_MAX, 0); + +static MYSQL_SYSVAR_UINT64_T(wal_bytes_per_sync, + rocksdb_db_options->wal_bytes_per_sync, + PLUGIN_VAR_RQCMDARG, + "DBOptions::wal_bytes_per_sync for RocksDB", nullptr, + rocksdb_set_wal_bytes_per_sync, + rocksdb_db_options->wal_bytes_per_sync, + /* min */ 0L, /* max */ ULONGLONG_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + enable_thread_tracking, + *reinterpret_cast(&rocksdb_db_options->enable_thread_tracking), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "DBOptions::enable_thread_tracking for RocksDB", nullptr, nullptr, true); + +static MYSQL_SYSVAR_LONGLONG(block_cache_size, rocksdb_block_cache_size, + PLUGIN_VAR_RQCMDARG, + "block_cache size for RocksDB", + rocksdb_validate_set_block_cache_size, nullptr, + /* default */ RDB_DEFAULT_BLOCK_CACHE_SIZE, + /* min */ RDB_MIN_BLOCK_CACHE_SIZE, + /* max */ LLONG_MAX, + /* Block size */ RDB_MIN_BLOCK_CACHE_SIZE); + +static MYSQL_SYSVAR_LONGLONG(sim_cache_size, rocksdb_sim_cache_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Simulated cache size for RocksDB", nullptr, + nullptr, + /* default */ 0, + /* min */ 0, + /* max */ LLONG_MAX, + /* Block size */ 0); + +static MYSQL_SYSVAR_BOOL( + use_clock_cache, rocksdb_use_clock_cache, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Use ClockCache instead of default LRUCache for RocksDB", nullptr, nullptr, + false); + +static MYSQL_SYSVAR_BOOL(cache_dump, rocksdb_cache_dump, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Include RocksDB block cache content in core dump.", + nullptr, nullptr, true); + +static MYSQL_SYSVAR_DOUBLE(cache_high_pri_pool_ratio, + rocksdb_cache_high_pri_pool_ratio, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Specify the size of block cache high-pri pool", + nullptr, nullptr, /* default */ 0.0, /* min */ 0.0, + /* max */ 1.0, 0); + +static MYSQL_SYSVAR_BOOL( + cache_index_and_filter_blocks, + *reinterpret_cast( + &rocksdb_tbl_options->cache_index_and_filter_blocks), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::cache_index_and_filter_blocks for RocksDB", + nullptr, nullptr, true); + +static MYSQL_SYSVAR_BOOL( + cache_index_and_filter_with_high_priority, + *reinterpret_cast( + &rocksdb_tbl_options->cache_index_and_filter_blocks_with_high_priority), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "cache_index_and_filter_blocks_with_high_priority for RocksDB", nullptr, + nullptr, true); + +// When pin_l0_filter_and_index_blocks_in_cache is true, RocksDB will use the +// LRU cache, but will always keep the filter & idndex block's handle checked +// out (=won't call ShardedLRUCache::Release), plus the parsed out objects +// the LRU cache will never push flush them out, hence they're pinned. +// +// This fixes the mutex contention between :ShardedLRUCache::Lookup and +// ShardedLRUCache::Release which reduced the QPS ratio (QPS using secondary +// index / QPS using PK). +static MYSQL_SYSVAR_BOOL( + pin_l0_filter_and_index_blocks_in_cache, + *reinterpret_cast( + &rocksdb_tbl_options->pin_l0_filter_and_index_blocks_in_cache), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "pin_l0_filter_and_index_blocks_in_cache for RocksDB", nullptr, nullptr, + true); + +static MYSQL_SYSVAR_ENUM(index_type, rocksdb_index_type, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::index_type for RocksDB", + nullptr, nullptr, + (ulong)rocksdb_tbl_options->index_type, + &index_type_typelib); + +static MYSQL_SYSVAR_BOOL( + hash_index_allow_collision, + *reinterpret_cast( + &rocksdb_tbl_options->hash_index_allow_collision), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::hash_index_allow_collision for RocksDB", nullptr, + nullptr, rocksdb_tbl_options->hash_index_allow_collision); + +static MYSQL_SYSVAR_BOOL( + no_block_cache, + *reinterpret_cast(&rocksdb_tbl_options->no_block_cache), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::no_block_cache for RocksDB", nullptr, nullptr, + rocksdb_tbl_options->no_block_cache); + +static MYSQL_SYSVAR_SIZE_T(block_size, rocksdb_tbl_options->block_size, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::block_size for RocksDB", + nullptr, nullptr, rocksdb_tbl_options->block_size, + /* min */ 1L, /* max */ SIZE_T_MAX, 0); + +static MYSQL_SYSVAR_INT( + block_size_deviation, rocksdb_tbl_options->block_size_deviation, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::block_size_deviation for RocksDB", nullptr, + nullptr, rocksdb_tbl_options->block_size_deviation, + /* min */ 0, /* max */ INT_MAX, 0); + +static MYSQL_SYSVAR_INT( + block_restart_interval, rocksdb_tbl_options->block_restart_interval, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::block_restart_interval for RocksDB", nullptr, + nullptr, rocksdb_tbl_options->block_restart_interval, + /* min */ 1, /* max */ INT_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + whole_key_filtering, + *reinterpret_cast(&rocksdb_tbl_options->whole_key_filtering), + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "BlockBasedTableOptions::whole_key_filtering for RocksDB", nullptr, nullptr, + rocksdb_tbl_options->whole_key_filtering); + +static MYSQL_SYSVAR_STR(default_cf_options, rocksdb_default_cf_options, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "default cf options for RocksDB", nullptr, nullptr, ""); + +static MYSQL_SYSVAR_STR(override_cf_options, rocksdb_override_cf_options, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "option overrides per cf for RocksDB", nullptr, nullptr, + ""); + +static MYSQL_SYSVAR_STR(update_cf_options, rocksdb_update_cf_options, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC + /* psergey-merge: need this? : PLUGIN_VAR_ALLOCATED*/, + "Option updates per column family for RocksDB", + rocksdb_validate_update_cf_options, + rocksdb_set_update_cf_options, nullptr); + +static MYSQL_SYSVAR_UINT(flush_log_at_trx_commit, + rocksdb_flush_log_at_trx_commit, PLUGIN_VAR_RQCMDARG, + "Sync on transaction commit. Similar to " + "innodb_flush_log_at_trx_commit. 1: sync on commit, " + "0,2: not sync on commit", + rocksdb_validate_flush_log_at_trx_commit, nullptr, + /* default */ FLUSH_LOG_SYNC, + /* min */ FLUSH_LOG_NEVER, + /* max */ FLUSH_LOG_BACKGROUND, 0); + +static MYSQL_THDVAR_BOOL(write_disable_wal, PLUGIN_VAR_RQCMDARG, + "WriteOptions::disableWAL for RocksDB", nullptr, + nullptr, rocksdb::WriteOptions().disableWAL); + +static MYSQL_THDVAR_BOOL( + write_ignore_missing_column_families, PLUGIN_VAR_RQCMDARG, + "WriteOptions::ignore_missing_column_families for RocksDB", nullptr, + nullptr, rocksdb::WriteOptions().ignore_missing_column_families); + +static MYSQL_THDVAR_BOOL(skip_fill_cache, PLUGIN_VAR_RQCMDARG, + "Skip filling block cache on read requests", nullptr, + nullptr, FALSE); + +static MYSQL_THDVAR_BOOL( + unsafe_for_binlog, PLUGIN_VAR_RQCMDARG, + "Allowing statement based binary logging which may break consistency", + nullptr, nullptr, FALSE); + +static MYSQL_THDVAR_UINT(records_in_range, PLUGIN_VAR_RQCMDARG, + "Used to override the result of records_in_range(). " + "Set to a positive number to override", + nullptr, nullptr, 0, + /* min */ 0, /* max */ INT_MAX, 0); + +static MYSQL_THDVAR_UINT(force_index_records_in_range, PLUGIN_VAR_RQCMDARG, + "Used to override the result of records_in_range() " + "when FORCE INDEX is used.", + nullptr, nullptr, 0, + /* min */ 0, /* max */ INT_MAX, 0); + +static MYSQL_SYSVAR_UINT( + debug_optimizer_n_rows, rocksdb_debug_optimizer_n_rows, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY | PLUGIN_VAR_NOSYSVAR, + "Test only to override rocksdb estimates of table size in a memtable", + nullptr, nullptr, 0, /* min */ 0, /* max */ INT_MAX, 0); + +static MYSQL_SYSVAR_BOOL(force_compute_memtable_stats, + rocksdb_force_compute_memtable_stats, + PLUGIN_VAR_RQCMDARG, + "Force to always compute memtable stats", nullptr, + nullptr, TRUE); + +static MYSQL_SYSVAR_UINT(force_compute_memtable_stats_cachetime, + rocksdb_force_compute_memtable_stats_cachetime, + PLUGIN_VAR_RQCMDARG, + "Time in usecs to cache memtable estimates", nullptr, + nullptr, /* default */ 60 * 1000 * 1000, + /* min */ 0, /* max */ INT_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + debug_optimizer_no_zero_cardinality, + rocksdb_debug_optimizer_no_zero_cardinality, PLUGIN_VAR_RQCMDARG, + "In case if cardinality is zero, overrides it with some value", nullptr, + nullptr, TRUE); + +static MYSQL_SYSVAR_STR(compact_cf, rocksdb_compact_cf_name, + PLUGIN_VAR_RQCMDARG, "Compact column family", + rocksdb_compact_column_family, + rocksdb_compact_column_family_stub, ""); + +static MYSQL_SYSVAR_STR(delete_cf, rocksdb_delete_cf_name, PLUGIN_VAR_RQCMDARG, + "Delete column family", rocksdb_delete_column_family, + rocksdb_delete_column_family_stub, ""); + +static MYSQL_SYSVAR_STR(create_checkpoint, rocksdb_checkpoint_name, + PLUGIN_VAR_RQCMDARG, "Checkpoint directory", + rocksdb_create_checkpoint, + rocksdb_create_checkpoint_stub, ""); + +static MYSQL_SYSVAR_BOOL(remove_mariabackup_checkpoint, + rocksdb_signal_remove_mariabackup_checkpoint, + PLUGIN_VAR_RQCMDARG, "Remove mariabackup checkpoint", + nullptr, rocksdb_remove_mariabackup_checkpoint, FALSE); + +static MYSQL_SYSVAR_BOOL(signal_drop_index_thread, + rocksdb_signal_drop_index_thread, PLUGIN_VAR_RQCMDARG, + "Wake up drop index thread", nullptr, + rocksdb_drop_index_wakeup_thread, FALSE); + +static MYSQL_SYSVAR_BOOL(pause_background_work, rocksdb_pause_background_work, + PLUGIN_VAR_RQCMDARG, + "Disable all rocksdb background operations", nullptr, + rocksdb_set_pause_background_work, FALSE); + +static MYSQL_SYSVAR_BOOL( + enable_ttl, rocksdb_enable_ttl, PLUGIN_VAR_RQCMDARG, + "Enable expired TTL records to be dropped during compaction.", nullptr, + nullptr, TRUE); + +static MYSQL_SYSVAR_BOOL( + enable_ttl_read_filtering, rocksdb_enable_ttl_read_filtering, + PLUGIN_VAR_RQCMDARG, + "For tables with TTL, expired records are skipped/filtered out during " + "processing and in query results. Disabling this will allow these records " + "to be seen, but as a result rows may disappear in the middle of " + "transactions as they are dropped during compaction. Use with caution.", + nullptr, nullptr, TRUE); + +static MYSQL_SYSVAR_INT( + debug_ttl_rec_ts, rocksdb_debug_ttl_rec_ts, PLUGIN_VAR_RQCMDARG, + "For debugging purposes only. Overrides the TTL of records to " + "now() + debug_ttl_rec_ts. The value can be +/- to simulate " + "a record inserted in the past vs a record inserted in the 'future'. " + "A value of 0 denotes that the variable is not set. This variable is a " + "no-op in non-debug builds.", + nullptr, nullptr, 0, /* min */ -3600, /* max */ 3600, 0); + +static MYSQL_SYSVAR_INT( + debug_ttl_snapshot_ts, rocksdb_debug_ttl_snapshot_ts, PLUGIN_VAR_RQCMDARG, + "For debugging purposes only. Sets the snapshot during compaction to " + "now() + debug_set_ttl_snapshot_ts. The value can be +/- to simulate " + "a snapshot in the past vs a snapshot created in the 'future'. " + "A value of 0 denotes that the variable is not set. This variable is a " + "no-op in non-debug builds.", + nullptr, nullptr, 0, /* min */ -3600, /* max */ 3600, 0); + +static MYSQL_SYSVAR_INT( + debug_ttl_read_filter_ts, rocksdb_debug_ttl_read_filter_ts, + PLUGIN_VAR_RQCMDARG, + "For debugging purposes only. Overrides the TTL read filtering time to " + "time + debug_ttl_read_filter_ts. A value of 0 denotes that the variable " + "is not set. This variable is a no-op in non-debug builds.", + nullptr, nullptr, 0, /* min */ -3600, /* max */ 3600, 0); + +static MYSQL_SYSVAR_BOOL( + debug_ttl_ignore_pk, rocksdb_debug_ttl_ignore_pk, PLUGIN_VAR_RQCMDARG, + "For debugging purposes only. If true, compaction filtering will not occur " + "on PK TTL data. This variable is a no-op in non-debug builds.", + nullptr, nullptr, FALSE); + +static MYSQL_SYSVAR_UINT( + max_manual_compactions, rocksdb_max_manual_compactions, PLUGIN_VAR_RQCMDARG, + "Maximum number of pending + ongoing number of manual compactions.", + nullptr, nullptr, /* default */ 10, /* min */ 0, /* max */ UINT_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + rollback_on_timeout, rocksdb_rollback_on_timeout, PLUGIN_VAR_OPCMDARG, + "Whether to roll back the complete transaction or a single statement on " + "lock wait timeout (a single statement by default)", + NULL, NULL, FALSE); + +static MYSQL_SYSVAR_UINT( + debug_manual_compaction_delay, rocksdb_debug_manual_compaction_delay, + PLUGIN_VAR_RQCMDARG, + "For debugging purposes only. Sleeping specified seconds " + "for simulating long running compactions.", + nullptr, nullptr, 0, /* min */ 0, /* max */ UINT_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + reset_stats, rocksdb_reset_stats, PLUGIN_VAR_RQCMDARG, + "Reset the RocksDB internal statistics without restarting the DB.", nullptr, + rocksdb_set_reset_stats, FALSE); + +static MYSQL_SYSVAR_UINT(io_write_timeout, rocksdb_io_write_timeout_secs, + PLUGIN_VAR_RQCMDARG, + "Timeout for experimental I/O watchdog.", nullptr, + rocksdb_set_io_write_timeout, /* default */ 0, + /* min */ 0L, + /* max */ UINT_MAX, 0); + +static MYSQL_SYSVAR_BOOL(enable_2pc, rocksdb_enable_2pc, PLUGIN_VAR_RQCMDARG, + "Enable two phase commit for MyRocks", nullptr, + nullptr, TRUE); + +static MYSQL_SYSVAR_BOOL(ignore_unknown_options, rocksdb_ignore_unknown_options, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "Enable ignoring unknown options passed to RocksDB", + nullptr, nullptr, TRUE); + +static MYSQL_SYSVAR_BOOL(strict_collation_check, rocksdb_strict_collation_check, + PLUGIN_VAR_RQCMDARG, + "Enforce case sensitive collation for MyRocks indexes", + nullptr, nullptr, TRUE); + +static MYSQL_SYSVAR_STR(strict_collation_exceptions, + rocksdb_strict_collation_exceptions, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_MEMALLOC, + "List of tables (using regex) that are excluded " + "from the case sensitive collation enforcement", + nullptr, rocksdb_set_collation_exception_list, ""); + +static MYSQL_SYSVAR_BOOL(collect_sst_properties, rocksdb_collect_sst_properties, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Enables collecting SST file properties on each flush", + nullptr, nullptr, rocksdb_collect_sst_properties); + +static MYSQL_SYSVAR_BOOL( + force_flush_memtable_now, rocksdb_force_flush_memtable_now_var, + PLUGIN_VAR_RQCMDARG, + "Forces memstore flush which may block all write requests so be careful", + rocksdb_force_flush_memtable_now, rocksdb_force_flush_memtable_now_stub, + FALSE); + +static MYSQL_SYSVAR_BOOL( + force_flush_memtable_and_lzero_now, + rocksdb_force_flush_memtable_and_lzero_now_var, PLUGIN_VAR_RQCMDARG, + "Acts similar to force_flush_memtable_now, but also compacts all L0 files.", + rocksdb_force_flush_memtable_and_lzero_now, + rocksdb_force_flush_memtable_and_lzero_now_stub, FALSE); + +static MYSQL_SYSVAR_UINT( + seconds_between_stat_computes, rocksdb_seconds_between_stat_computes, + PLUGIN_VAR_RQCMDARG, + "Sets a number of seconds to wait between optimizer stats recomputation. " + "Only changed indexes will be refreshed.", + nullptr, nullptr, rocksdb_seconds_between_stat_computes, + /* min */ 0L, /* max */ UINT_MAX, 0); + +static MYSQL_SYSVAR_LONGLONG(compaction_sequential_deletes, + rocksdb_compaction_sequential_deletes, + PLUGIN_VAR_RQCMDARG, + "RocksDB will trigger compaction for the file if " + "it has more than this number sequential deletes " + "per window", + nullptr, rocksdb_set_compaction_options, + DEFAULT_COMPACTION_SEQUENTIAL_DELETES, + /* min */ 0L, + /* max */ MAX_COMPACTION_SEQUENTIAL_DELETES, 0); + +static MYSQL_SYSVAR_LONGLONG( + compaction_sequential_deletes_window, + rocksdb_compaction_sequential_deletes_window, PLUGIN_VAR_RQCMDARG, + "Size of the window for counting rocksdb_compaction_sequential_deletes", + nullptr, rocksdb_set_compaction_options, + DEFAULT_COMPACTION_SEQUENTIAL_DELETES_WINDOW, + /* min */ 0L, /* max */ MAX_COMPACTION_SEQUENTIAL_DELETES_WINDOW, 0); + +static MYSQL_SYSVAR_LONGLONG( + compaction_sequential_deletes_file_size, + rocksdb_compaction_sequential_deletes_file_size, PLUGIN_VAR_RQCMDARG, + "Minimum file size required for compaction_sequential_deletes", nullptr, + rocksdb_set_compaction_options, 0L, + /* min */ -1L, /* max */ LLONG_MAX, 0); + +static MYSQL_SYSVAR_BOOL( + compaction_sequential_deletes_count_sd, + rocksdb_compaction_sequential_deletes_count_sd, PLUGIN_VAR_RQCMDARG, + "Counting SingleDelete as rocksdb_compaction_sequential_deletes", nullptr, + nullptr, rocksdb_compaction_sequential_deletes_count_sd); + +static MYSQL_SYSVAR_BOOL( + print_snapshot_conflict_queries, rocksdb_print_snapshot_conflict_queries, + PLUGIN_VAR_RQCMDARG, + "Logging queries that got snapshot conflict errors into *.err log", nullptr, + nullptr, rocksdb_print_snapshot_conflict_queries); + +static MYSQL_THDVAR_INT(checksums_pct, PLUGIN_VAR_RQCMDARG, + "How many percentages of rows to be checksummed", + nullptr, nullptr, RDB_MAX_CHECKSUMS_PCT, + /* min */ 0, /* max */ RDB_MAX_CHECKSUMS_PCT, 0); + +static MYSQL_THDVAR_BOOL(store_row_debug_checksums, PLUGIN_VAR_RQCMDARG, + "Include checksums when writing index/table records", + nullptr, nullptr, false /* default value */); + +static MYSQL_THDVAR_BOOL(verify_row_debug_checksums, PLUGIN_VAR_RQCMDARG, + "Verify checksums when reading index/table records", + nullptr, nullptr, false /* default value */); + +static MYSQL_THDVAR_BOOL(master_skip_tx_api, PLUGIN_VAR_RQCMDARG, + "Skipping holding any lock on row access. " + "Not effective on slave.", + nullptr, nullptr, false); + +static MYSQL_SYSVAR_UINT( + validate_tables, rocksdb_validate_tables, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Verify all .frm files match all RocksDB tables (0 means no verification, " + "1 means verify and fail on error, and 2 means verify but continue", + nullptr, nullptr, 1 /* default value */, 0 /* min value */, + 2 /* max value */, 0); + +static MYSQL_SYSVAR_STR(datadir, rocksdb_datadir, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "RocksDB data directory", nullptr, nullptr, + "./#rocksdb"); + +static MYSQL_SYSVAR_STR(supported_compression_types, + compression_types_val, + PLUGIN_VAR_NOCMDOPT | PLUGIN_VAR_READONLY, + "Compression algorithms supported by RocksDB", + nullptr, nullptr, + compression_types_val); + +static MYSQL_SYSVAR_UINT( + table_stats_sampling_pct, rocksdb_table_stats_sampling_pct, + PLUGIN_VAR_RQCMDARG, + "Percentage of entries to sample when collecting statistics about table " + "properties. Specify either 0 to sample everything or percentage " + "[" STRINGIFY_ARG(RDB_TBL_STATS_SAMPLE_PCT_MIN) ".." STRINGIFY_ARG( + RDB_TBL_STATS_SAMPLE_PCT_MAX) "]. " + "By default " STRINGIFY_ARG( + RDB_DEFAULT_TBL_STATS_SAMPLE_PCT) "% " + "of" + " e" + "nt" + "ri" + "es" + " a" + "re" + " " + "sa" + "mp" + "le" + "d" + ".", + nullptr, rocksdb_set_table_stats_sampling_pct, /* default */ + RDB_DEFAULT_TBL_STATS_SAMPLE_PCT, /* everything */ 0, + /* max */ RDB_TBL_STATS_SAMPLE_PCT_MAX, 0); + +static MYSQL_SYSVAR_UINT( + stats_recalc_rate, rocksdb_stats_recalc_rate, PLUGIN_VAR_RQCMDARG, + "The number of indexes per second to recalculate statistics for. 0 to " + "disable background recalculation.", + nullptr, nullptr, 0 /* default value */, 0 /* min value */, + UINT_MAX /* max value */, 0); + +static MYSQL_SYSVAR_BOOL( + large_prefix, rocksdb_large_prefix, PLUGIN_VAR_RQCMDARG, + "Support large index prefix length of 3072 bytes. If off, the maximum " + "index prefix length is 767.", + nullptr, nullptr, FALSE); + +static MYSQL_SYSVAR_BOOL( + allow_to_start_after_corruption, rocksdb_allow_to_start_after_corruption, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "Allow server still to start successfully even if RocksDB corruption is " + "detected.", + nullptr, nullptr, FALSE); + +static MYSQL_SYSVAR_BOOL(error_on_suboptimal_collation, + rocksdb_error_on_suboptimal_collation, + PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, + "Raise an error instead of warning if a sub-optimal " + "collation is used", + nullptr, nullptr, TRUE); + +static MYSQL_SYSVAR_BOOL( + enable_insert_with_update_caching, + rocksdb_enable_insert_with_update_caching, PLUGIN_VAR_OPCMDARG, + "Whether to enable optimization where we cache the read from a failed " + "insertion attempt in INSERT ON DUPLICATE KEY UPDATE", + nullptr, nullptr, TRUE); + +static const int ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE = 100; + +static struct st_mysql_sys_var *rocksdb_system_variables[] = { + MYSQL_SYSVAR(lock_wait_timeout), + MYSQL_SYSVAR(deadlock_detect), + MYSQL_SYSVAR(deadlock_detect_depth), + MYSQL_SYSVAR(commit_time_batch_for_recovery), + MYSQL_SYSVAR(max_row_locks), + MYSQL_SYSVAR(write_batch_max_bytes), + MYSQL_SYSVAR(lock_scanned_rows), + MYSQL_SYSVAR(bulk_load), + MYSQL_SYSVAR(bulk_load_allow_sk), + MYSQL_SYSVAR(bulk_load_allow_unsorted), + MYSQL_SYSVAR(skip_unique_check_tables), + MYSQL_SYSVAR(trace_sst_api), + MYSQL_SYSVAR(commit_in_the_middle), + MYSQL_SYSVAR(blind_delete_primary_key), +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported + MYSQL_SYSVAR(read_free_rpl_tables), + MYSQL_SYSVAR(read_free_rpl), +#endif + MYSQL_SYSVAR(bulk_load_size), + MYSQL_SYSVAR(merge_buf_size), + MYSQL_SYSVAR(enable_bulk_load_api), + MYSQL_SYSVAR(tmpdir), + MYSQL_SYSVAR(merge_combine_read_size), + MYSQL_SYSVAR(merge_tmp_file_removal_delay_ms), + MYSQL_SYSVAR(skip_bloom_filter_on_read), + + MYSQL_SYSVAR(create_if_missing), + MYSQL_SYSVAR(two_write_queues), + MYSQL_SYSVAR(manual_wal_flush), + MYSQL_SYSVAR(write_policy), + MYSQL_SYSVAR(create_missing_column_families), + MYSQL_SYSVAR(error_if_exists), + MYSQL_SYSVAR(paranoid_checks), + MYSQL_SYSVAR(rate_limiter_bytes_per_sec), + MYSQL_SYSVAR(sst_mgr_rate_bytes_per_sec), + MYSQL_SYSVAR(delayed_write_rate), + MYSQL_SYSVAR(max_latest_deadlocks), + MYSQL_SYSVAR(info_log_level), + MYSQL_SYSVAR(max_open_files), + MYSQL_SYSVAR(max_total_wal_size), + MYSQL_SYSVAR(use_fsync), + MYSQL_SYSVAR(wal_dir), + MYSQL_SYSVAR(persistent_cache_path), + MYSQL_SYSVAR(persistent_cache_size_mb), + MYSQL_SYSVAR(delete_obsolete_files_period_micros), + MYSQL_SYSVAR(max_background_jobs), + MYSQL_SYSVAR(max_log_file_size), + MYSQL_SYSVAR(max_subcompactions), + MYSQL_SYSVAR(log_file_time_to_roll), + MYSQL_SYSVAR(keep_log_file_num), + MYSQL_SYSVAR(max_manifest_file_size), + MYSQL_SYSVAR(table_cache_numshardbits), + MYSQL_SYSVAR(wal_ttl_seconds), + MYSQL_SYSVAR(wal_size_limit_mb), + MYSQL_SYSVAR(manifest_preallocation_size), + MYSQL_SYSVAR(use_direct_reads), + MYSQL_SYSVAR(use_direct_io_for_flush_and_compaction), + MYSQL_SYSVAR(allow_mmap_reads), + MYSQL_SYSVAR(allow_mmap_writes), + MYSQL_SYSVAR(is_fd_close_on_exec), + MYSQL_SYSVAR(stats_dump_period_sec), + MYSQL_SYSVAR(advise_random_on_open), + MYSQL_SYSVAR(db_write_buffer_size), + MYSQL_SYSVAR(use_adaptive_mutex), + MYSQL_SYSVAR(bytes_per_sync), + MYSQL_SYSVAR(wal_bytes_per_sync), + MYSQL_SYSVAR(enable_thread_tracking), + MYSQL_SYSVAR(perf_context_level), + MYSQL_SYSVAR(wal_recovery_mode), + MYSQL_SYSVAR(stats_level), + MYSQL_SYSVAR(access_hint_on_compaction_start), + MYSQL_SYSVAR(new_table_reader_for_compaction_inputs), + MYSQL_SYSVAR(compaction_readahead_size), + MYSQL_SYSVAR(allow_concurrent_memtable_write), + MYSQL_SYSVAR(enable_write_thread_adaptive_yield), + + MYSQL_SYSVAR(block_cache_size), + MYSQL_SYSVAR(sim_cache_size), + MYSQL_SYSVAR(use_clock_cache), + MYSQL_SYSVAR(cache_high_pri_pool_ratio), + MYSQL_SYSVAR(cache_dump), + MYSQL_SYSVAR(cache_index_and_filter_blocks), + MYSQL_SYSVAR(cache_index_and_filter_with_high_priority), + MYSQL_SYSVAR(pin_l0_filter_and_index_blocks_in_cache), + MYSQL_SYSVAR(index_type), + MYSQL_SYSVAR(hash_index_allow_collision), + MYSQL_SYSVAR(no_block_cache), + MYSQL_SYSVAR(block_size), + MYSQL_SYSVAR(block_size_deviation), + MYSQL_SYSVAR(block_restart_interval), + MYSQL_SYSVAR(whole_key_filtering), + + MYSQL_SYSVAR(default_cf_options), + MYSQL_SYSVAR(override_cf_options), + MYSQL_SYSVAR(update_cf_options), + + MYSQL_SYSVAR(flush_log_at_trx_commit), + MYSQL_SYSVAR(write_disable_wal), + MYSQL_SYSVAR(write_ignore_missing_column_families), + + MYSQL_SYSVAR(skip_fill_cache), + MYSQL_SYSVAR(unsafe_for_binlog), + + MYSQL_SYSVAR(records_in_range), + MYSQL_SYSVAR(force_index_records_in_range), + MYSQL_SYSVAR(debug_optimizer_n_rows), + MYSQL_SYSVAR(force_compute_memtable_stats), + MYSQL_SYSVAR(force_compute_memtable_stats_cachetime), + MYSQL_SYSVAR(debug_optimizer_no_zero_cardinality), + + MYSQL_SYSVAR(compact_cf), + MYSQL_SYSVAR(delete_cf), + MYSQL_SYSVAR(signal_drop_index_thread), + MYSQL_SYSVAR(pause_background_work), + MYSQL_SYSVAR(enable_2pc), + MYSQL_SYSVAR(ignore_unknown_options), + MYSQL_SYSVAR(strict_collation_check), + MYSQL_SYSVAR(strict_collation_exceptions), + MYSQL_SYSVAR(collect_sst_properties), + MYSQL_SYSVAR(force_flush_memtable_now), + MYSQL_SYSVAR(force_flush_memtable_and_lzero_now), + MYSQL_SYSVAR(enable_ttl), + MYSQL_SYSVAR(enable_ttl_read_filtering), + MYSQL_SYSVAR(debug_ttl_rec_ts), + MYSQL_SYSVAR(debug_ttl_snapshot_ts), + MYSQL_SYSVAR(debug_ttl_read_filter_ts), + MYSQL_SYSVAR(debug_ttl_ignore_pk), + MYSQL_SYSVAR(reset_stats), + MYSQL_SYSVAR(io_write_timeout), + MYSQL_SYSVAR(seconds_between_stat_computes), + + MYSQL_SYSVAR(compaction_sequential_deletes), + MYSQL_SYSVAR(compaction_sequential_deletes_window), + MYSQL_SYSVAR(compaction_sequential_deletes_file_size), + MYSQL_SYSVAR(compaction_sequential_deletes_count_sd), + MYSQL_SYSVAR(print_snapshot_conflict_queries), + + MYSQL_SYSVAR(datadir), + MYSQL_SYSVAR(supported_compression_types), + MYSQL_SYSVAR(create_checkpoint), + MYSQL_SYSVAR(remove_mariabackup_checkpoint), + MYSQL_SYSVAR(checksums_pct), + MYSQL_SYSVAR(store_row_debug_checksums), + MYSQL_SYSVAR(verify_row_debug_checksums), + MYSQL_SYSVAR(master_skip_tx_api), + + MYSQL_SYSVAR(validate_tables), + MYSQL_SYSVAR(table_stats_sampling_pct), + + MYSQL_SYSVAR(large_prefix), + MYSQL_SYSVAR(allow_to_start_after_corruption), + MYSQL_SYSVAR(git_hash), + MYSQL_SYSVAR(error_on_suboptimal_collation), + MYSQL_SYSVAR(stats_recalc_rate), + MYSQL_SYSVAR(debug_manual_compaction_delay), + MYSQL_SYSVAR(max_manual_compactions), + MYSQL_SYSVAR(manual_compaction_threads), + MYSQL_SYSVAR(rollback_on_timeout), + + MYSQL_SYSVAR(enable_insert_with_update_caching), + nullptr}; + +static rocksdb::WriteOptions rdb_get_rocksdb_write_options( + my_core::THD *const thd) { + rocksdb::WriteOptions opt; + + opt.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); + opt.disableWAL = THDVAR(thd, write_disable_wal); + opt.ignore_missing_column_families = + THDVAR(thd, write_ignore_missing_column_families); + + return opt; +} + +static int rocksdb_compact_column_family(THD *const thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + struct st_mysql_value *const value) { + char buff[STRING_BUFFER_USUAL_SIZE]; + int len = sizeof(buff); + + DBUG_ASSERT(value != nullptr); + + if (const char *const cf = value->val_str(value, buff, &len)) { + auto cfh = cf_manager.get_cf(cf); + if (cfh != nullptr && rdb != nullptr) { + int mc_id = rdb_mc_thread.request_manual_compaction( + cfh, nullptr, nullptr, THDVAR(thd, manual_compaction_threads)); + if (mc_id == -1) { + my_error(ER_INTERNAL_ERROR, MYF(0), + "Can't schedule more manual compactions. " + "Increase rocksdb_max_manual_compactions or stop issuing " + "more manual compactions."); + return HA_EXIT_FAILURE; + } else if (mc_id < 0) { + return HA_EXIT_FAILURE; + } + // NO_LINT_DEBUG + sql_print_information("RocksDB: Manual compaction of column family: %s\n", + cf); + // Checking thd state every short cycle (100ms). This is for allowing to + // exiting this function without waiting for CompactRange to finish. + do { + my_sleep(100000); + } while (!thd->killed && + !rdb_mc_thread.is_manual_compaction_finished(mc_id)); + + if (thd->killed) { + // This cancels if requested compaction state is INITED. + // TODO(yoshinorim): Cancel running compaction as well once + // it is supported in RocksDB. + rdb_mc_thread.clear_manual_compaction_request(mc_id, true); + } + } + } + return HA_EXIT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////////////////// + +/* + Drop index thread's control +*/ + +static Rdb_drop_index_thread rdb_drop_idx_thread; + +static void rocksdb_drop_index_wakeup_thread( + my_core::THD *const thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { + if (*static_cast(save)) { + rdb_drop_idx_thread.signal(); + } +} + +static inline uint32_t rocksdb_perf_context_level(THD *const thd) { + DBUG_ASSERT(thd != nullptr); + + const int session_perf_context_level = THDVAR(thd, perf_context_level); + if (session_perf_context_level > rocksdb::PerfLevel::kUninitialized) { + return session_perf_context_level; + } + + /* + Fallback to global thdvar, if session specific one was not set to a valid + value. + */ + + const int global_perf_context_level = THDVAR(nullptr, perf_context_level); + if (global_perf_context_level > rocksdb::PerfLevel::kUninitialized) { + return global_perf_context_level; + } + + return rocksdb::PerfLevel::kDisable; +} + +/* + Very short (functor-like) interface to be passed to + Rdb_transaction::walk_tx_list() +*/ + +interface Rdb_tx_list_walker { + virtual ~Rdb_tx_list_walker() {} + virtual void process_tran(const Rdb_transaction *const) = 0; +}; + +/* + This is a helper class that is passed to RocksDB to get notifications when + a snapshot gets created. +*/ + +class Rdb_snapshot_notifier : public rocksdb::TransactionNotifier { + Rdb_transaction *m_owning_tx; + + void SnapshotCreated(const rocksdb::Snapshot *snapshot) override; + + public: + Rdb_snapshot_notifier(const Rdb_snapshot_notifier &) = delete; + Rdb_snapshot_notifier &operator=(const Rdb_snapshot_notifier &) = delete; + + explicit Rdb_snapshot_notifier(Rdb_transaction *const owning_tx) + : m_owning_tx(owning_tx) {} + + // If the owning Rdb_transaction gets destructed we need to not reference + // it anymore. + void detach() { m_owning_tx = nullptr; } +}; + + +#ifdef MARIAROCKS_NOT_YET +// ER_LOCK_WAIT_TIMEOUT error also has a reason in facebook/mysql-5.6 +#endif +String timeout_message(const char *command, const char *name1, + const char *name2) +{ + String msg; + msg.append("Timeout on "); + msg.append(command); + msg.append(": "); + msg.append(name1); + if (name2 && name2[0]) + { + msg.append("."); + msg.append(name2); + } + return msg; +} + + +/* This is the base class for transactions when interacting with rocksdb. + */ +class Rdb_transaction { + protected: + ulonglong m_write_count = 0; + ulonglong m_insert_count = 0; + ulonglong m_update_count = 0; + ulonglong m_delete_count = 0; + ulonglong m_lock_count = 0; + std::unordered_map m_auto_incr_map; + + bool m_is_delayed_snapshot = false; + bool m_is_two_phase = false; + + std::unordered_set modified_tables; + + private: + /* + Number of write operations this transaction had when we took the last + savepoint (the idea is not to take another savepoint if we haven't made + any changes) + */ + ulonglong m_writes_at_last_savepoint; + + protected: + +protected: + THD *m_thd = nullptr; + + static std::multiset s_tx_list; + static mysql_mutex_t s_tx_list_mutex; + + Rdb_io_perf *m_tbl_io_perf; + + bool m_tx_read_only = false; + + int m_timeout_sec; /* Cached value of @@rocksdb_lock_wait_timeout */ + + /* Maximum number of locks the transaction can have */ + ulonglong m_max_row_locks; + + bool m_is_tx_failed = false; + bool m_rollback_only = false; + + std::shared_ptr m_notifier; + + // This should be used only when updating binlog information. + virtual rocksdb::WriteBatchBase *get_write_batch() = 0; + virtual bool commit_no_binlog() = 0; + virtual rocksdb::Iterator *get_iterator( + const rocksdb::ReadOptions &options, + rocksdb::ColumnFamilyHandle *column_family) = 0; + +protected: + /* + The following two are helper functions to be overloaded by child classes. + They should provide RocksDB's savepoint semantics. + */ + virtual void do_set_savepoint() = 0; + virtual void do_rollback_to_savepoint() = 0; + + /* + @detail + This function takes in the WriteBatch of the transaction to add + all the AUTO_INCREMENT merges. It does so by iterating through + m_auto_incr_map and then constructing key/value pairs to call merge upon. + + @param wb + */ + rocksdb::Status merge_auto_incr_map(rocksdb::WriteBatchBase *const wb) { + DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", return rocksdb::Status::OK();); + + // Iterate through the merge map merging all keys into data dictionary. + rocksdb::Status s; + for (auto &it : m_auto_incr_map) { + s = dict_manager.put_auto_incr_val(wb, it.first, it.second); + if (!s.ok()) { + return s; + } + } + m_auto_incr_map.clear(); + return s; + } + + public: + rocksdb::ReadOptions m_read_opts; + const char *m_mysql_log_file_name; + my_off_t m_mysql_log_offset; +#ifdef MARIAROCKS_NOT_YET + // TODO: MariaDB probably doesn't need these at all: + const char *m_mysql_gtid; + const char *m_mysql_max_gtid; +#endif + String m_detailed_error; + int64_t m_snapshot_timestamp = 0; + bool m_ddl_transaction; +#ifdef MARIAROCKS_NOT_YET + std::shared_ptr m_explicit_snapshot; +#endif + + /* + Tracks the number of tables in use through external_lock. + This should not be reset during start_tx(). + */ + int64_t m_n_mysql_tables_in_use = 0; + + /* + MariaDB's group commit: + */ + bool commit_ordered_done; + bool commit_ordered_res; + + /* + for distinction between rdb_transaction_impl and rdb_writebatch_impl + when using walk tx list + */ + virtual bool is_writebatch_trx() const = 0; + + static void init_mutex() { + mysql_mutex_init(key_mutex_tx_list, &s_tx_list_mutex, MY_MUTEX_INIT_FAST); + } + + static void term_mutex() { + DBUG_ASSERT(s_tx_list.size() == 0); + mysql_mutex_destroy(&s_tx_list_mutex); + } + + static void walk_tx_list(Rdb_tx_list_walker *walker) { + DBUG_ASSERT(walker != nullptr); + + RDB_MUTEX_LOCK_CHECK(s_tx_list_mutex); + + for (auto it : s_tx_list) { + walker->process_tran(it); + } + + RDB_MUTEX_UNLOCK_CHECK(s_tx_list_mutex); + } + + int set_status_error(THD *const thd, const rocksdb::Status &s, + const Rdb_key_def &kd, Rdb_tbl_def *const tbl_def, + Rdb_table_handler *const table_handler) { + DBUG_ASSERT(!s.ok()); + DBUG_ASSERT(tbl_def != nullptr); + + if (s.IsTimedOut()) { + /* + SQL layer has weird expectations. If we return an error when + doing a read in DELETE IGNORE, it will ignore the error ("because it's + an IGNORE command!) but then will fail an assert, because "error code + was returned, but no error happened". Do what InnoDB's + convert_error_code_to_mysql() does: force a statement + rollback before returning HA_ERR_LOCK_WAIT_TIMEOUT: + */ + my_core::thd_mark_transaction_to_rollback( + thd, static_cast(rocksdb_rollback_on_timeout)); + m_detailed_error.copy(timeout_message( + "index", tbl_def->full_tablename().c_str(), kd.get_name().c_str())); + table_handler->m_lock_wait_timeout_counter.inc(); + rocksdb_row_lock_wait_timeouts++; + + return HA_ERR_LOCK_WAIT_TIMEOUT; + } + + if (s.IsDeadlock()) { + my_core::thd_mark_transaction_to_rollback(thd, + true /* whole transaction */); + m_detailed_error = String(); + table_handler->m_deadlock_counter.inc(); + rocksdb_row_lock_deadlocks++; + return HA_ERR_LOCK_DEADLOCK; + } else if (s.IsBusy()) { + rocksdb_snapshot_conflict_errors++; + if (rocksdb_print_snapshot_conflict_queries) { + char user_host_buff[MAX_USER_HOST_SIZE + 1]; + make_user_name(thd, user_host_buff); + // NO_LINT_DEBUG + sql_print_warning( + "Got snapshot conflict errors: User: %s " + "Query: %s", + user_host_buff, thd->query()); + } + m_detailed_error = String(" (snapshot conflict)", system_charset_info); + table_handler->m_deadlock_counter.inc(); + return HA_ERR_ROCKSDB_STATUS_BUSY; + } + + if (s.IsIOError() || s.IsCorruption()) { + rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL); + } + + return ha_rocksdb::rdb_error_to_mysql(s); + } + + THD *get_thd() const { return m_thd; } + + /* Used for tracking io_perf counters */ + void io_perf_start(Rdb_io_perf *const io_perf) { + /* + Since perf_context is tracked per thread, it is difficult and expensive + to maintain perf_context on a per table basis. Therefore, roll all + perf_context data into the first table used in a query. This works well + for single table queries and is probably good enough for queries that hit + multiple tables. + + perf_context stats gathering is started when the table lock is acquired + or when ha_rocksdb::start_stmt is called in case of LOCK TABLES. They + are recorded when the table lock is released, or when commit/rollback + is called on the transaction, whichever comes first. Table lock release + and commit/rollback can happen in different orders. In the case where + the lock is released before commit/rollback is called, an extra step to + gather stats during commit/rollback is needed. + */ + if (m_tbl_io_perf == nullptr && + io_perf->start(rocksdb_perf_context_level(m_thd))) { + m_tbl_io_perf = io_perf; + } + } + + void io_perf_end_and_record(void) { + if (m_tbl_io_perf != nullptr) { + m_tbl_io_perf->end_and_record(rocksdb_perf_context_level(m_thd)); + m_tbl_io_perf = nullptr; + } + } + + void io_perf_end_and_record(Rdb_io_perf *const io_perf) { + if (m_tbl_io_perf == io_perf) { + io_perf_end_and_record(); + } + } + + void update_bytes_written(ulonglong bytes_written) { + if (m_tbl_io_perf != nullptr) { + m_tbl_io_perf->update_bytes_written(rocksdb_perf_context_level(m_thd), + bytes_written); + } + } + + void set_params(int timeout_sec_arg, int max_row_locks_arg) { + m_timeout_sec = timeout_sec_arg; + m_max_row_locks = max_row_locks_arg; + set_lock_timeout(timeout_sec_arg); + } + + virtual void set_lock_timeout(int timeout_sec_arg) = 0; + + ulonglong get_write_count() const { return m_write_count; } + + ulonglong get_insert_count() const { return m_insert_count; } + + ulonglong get_update_count() const { return m_update_count; } + + ulonglong get_delete_count() const { return m_delete_count; } + + void incr_insert_count() { ++m_insert_count; } + + void incr_update_count() { ++m_update_count; } + + void incr_delete_count() { ++m_delete_count; } + + int get_timeout_sec() const { return m_timeout_sec; } + + ulonglong get_lock_count() const { return m_lock_count; } + + virtual void set_sync(bool sync) = 0; + + virtual void release_lock(rocksdb::ColumnFamilyHandle *const column_family, + const std::string &rowkey) = 0; + + virtual bool prepare(const rocksdb::TransactionName &name) = 0; + + bool commit_or_rollback() { + bool res; + if (m_is_tx_failed) { + rollback(); + res = false; + } else { + res = commit(); + } + return res; + } + + bool commit() { + if (get_write_count() == 0) { + rollback(); + return false; + } else if (m_rollback_only) { + /* + Transactions marked as rollback_only are expected to be rolled back at + prepare(). But there are some exceptions like below that prepare() is + never called and commit() is called instead. + 1. Binlog is disabled + 2. No modification exists in binlog cache for the transaction (#195) + In both cases, rolling back transaction is safe. Nothing is written to + binlog. + */ + my_error(ER_ROLLBACK_ONLY, MYF(0)); + rollback(); + return true; + } else { +#ifdef MARIAROCKS_NOT_YET + /* + Storing binlog position inside MyRocks is needed only for restoring + MyRocks from backups. This feature is not supported yet. + */ + mysql_bin_log_commit_pos(m_thd, &m_mysql_log_offset, + &m_mysql_log_file_name); + binlog_manager.update(m_mysql_log_file_name, m_mysql_log_offset, + get_write_batch()); +#endif + return commit_no_binlog(); + } + } + + virtual void rollback() = 0; + + void snapshot_created(const rocksdb::Snapshot *const snapshot) { + DBUG_ASSERT(snapshot != nullptr); + + m_read_opts.snapshot = snapshot; + rdb->GetEnv()->GetCurrentTime(&m_snapshot_timestamp); + m_is_delayed_snapshot = false; + } + + virtual void acquire_snapshot(bool acquire_now) = 0; + virtual void release_snapshot() = 0; + + bool has_snapshot() const { return m_read_opts.snapshot != nullptr; } + + private: + // The Rdb_sst_info structures we are currently loading. In a partitioned + // table this can have more than one entry + std::vector> m_curr_bulk_load; + std::string m_curr_bulk_load_tablename; + + /* External merge sorts for bulk load: key ID -> merge sort instance */ + std::unordered_map m_key_merge; + + public: + int get_key_merge(GL_INDEX_ID kd_gl_id, rocksdb::ColumnFamilyHandle *cf, + Rdb_index_merge **key_merge) { + int res; + auto it = m_key_merge.find(kd_gl_id); + if (it == m_key_merge.end()) { + m_key_merge.emplace( + std::piecewise_construct, std::make_tuple(kd_gl_id), + std::make_tuple( + get_rocksdb_tmpdir(), THDVAR(get_thd(), merge_buf_size), + THDVAR(get_thd(), merge_combine_read_size), + THDVAR(get_thd(), merge_tmp_file_removal_delay_ms), cf)); + it = m_key_merge.find(kd_gl_id); + if ((res = it->second.init()) != 0) { + return res; + } + } + *key_merge = &it->second; + return HA_EXIT_SUCCESS; + } + + /* Finish bulk loading for all table handlers belongs to one connection */ + int finish_bulk_load(bool *is_critical_error = nullptr, + int print_client_error = true) { + Ensure_cleanup cleanup([&]() { + // Always clear everything regardless of success/failure + m_curr_bulk_load.clear(); + m_curr_bulk_load_tablename.clear(); + m_key_merge.clear(); + }); + + int rc = 0; + if (is_critical_error) { + *is_critical_error = true; + } + + // PREPARE phase: finish all on-going bulk loading Rdb_sst_info and + // collect all Rdb_sst_commit_info containing (SST files, cf) + int rc2 = 0; + std::vector sst_commit_list; + sst_commit_list.reserve(m_curr_bulk_load.size()); + + for (auto &sst_info : m_curr_bulk_load) { + Rdb_sst_info::Rdb_sst_commit_info commit_info; + + // Commit the list of SST files and move it to the end of + // sst_commit_list, effectively transfer the ownership over + rc2 = sst_info->finish(&commit_info, print_client_error); + if (rc2 && rc == 0) { + // Don't return yet - make sure we finish all the SST infos + rc = rc2; + } + + // Make sure we have work to do - we might be losing the race + if (rc2 == 0 && commit_info.has_work()) { + sst_commit_list.emplace_back(std::move(commit_info)); + DBUG_ASSERT(!commit_info.has_work()); + } + } + + if (rc) { + return rc; + } + + // MERGING Phase: Flush the index_merge sort buffers into SST files in + // Rdb_sst_info and collect all Rdb_sst_commit_info containing + // (SST files, cf) + if (!m_key_merge.empty()) { + Ensure_cleanup malloc_cleanup([]() { + /* + Explicitly tell jemalloc to clean up any unused dirty pages at this + point. + See https://reviews.facebook.net/D63723 for more details. + */ + purge_all_jemalloc_arenas(); + }); + + rocksdb::Slice merge_key; + rocksdb::Slice merge_val; + for (auto it = m_key_merge.begin(); it != m_key_merge.end(); it++) { + GL_INDEX_ID index_id = it->first; + std::shared_ptr keydef = + ddl_manager.safe_find(index_id); + std::string table_name = ddl_manager.safe_get_table_name(index_id); + + // Unable to find key definition or table name since the + // table could have been dropped. + // TODO(herman): there is a race here between dropping the table + // and detecting a drop here. If the table is dropped while bulk + // loading is finishing, these keys being added here may + // be missed by the compaction filter and not be marked for + // removal. It is unclear how to lock the sql table from the storage + // engine to prevent modifications to it while bulk load is occurring. + if (keydef == nullptr) { + if (is_critical_error) { + // We used to set the error but simply ignores it. This follows + // current behavior and we should revisit this later + *is_critical_error = false; + } + return HA_ERR_KEY_NOT_FOUND; + } else if (table_name.empty()) { + if (is_critical_error) { + // We used to set the error but simply ignores it. This follows + // current behavior and we should revisit this later + *is_critical_error = false; + } + return HA_ERR_NO_SUCH_TABLE; + } + const std::string &index_name = keydef->get_name(); + Rdb_index_merge &rdb_merge = it->second; + + // Rdb_sst_info expects a denormalized table name in the form of + // "./database/table" + std::replace(table_name.begin(), table_name.end(), '.', '/'); + table_name = "./" + table_name; + auto sst_info = std::make_shared( + rdb, table_name, index_name, rdb_merge.get_cf(), + *rocksdb_db_options, THDVAR(get_thd(), trace_sst_api)); + + while ((rc2 = rdb_merge.next(&merge_key, &merge_val)) == 0) { + if ((rc2 = sst_info->put(merge_key, merge_val)) != 0) { + rc = rc2; + + // Don't return yet - make sure we finish the sst_info + break; + } + } + + // -1 => no more items + if (rc2 != -1 && rc != 0) { + rc = rc2; + } + + Rdb_sst_info::Rdb_sst_commit_info commit_info; + rc2 = sst_info->finish(&commit_info, print_client_error); + if (rc2 != 0 && rc == 0) { + // Only set the error from sst_info->finish if finish failed and we + // didn't fail before. In other words, we don't have finish's + // success mask earlier failures + rc = rc2; + } + + if (rc) { + return rc; + } + + if (commit_info.has_work()) { + sst_commit_list.emplace_back(std::move(commit_info)); + DBUG_ASSERT(!commit_info.has_work()); + } + } + } + + // Early return in case we lost the race completely and end up with no + // work at all + if (sst_commit_list.size() == 0) { + return rc; + } + + // INGEST phase: Group all Rdb_sst_commit_info by cf (as they might + // have the same cf across different indexes) and call out to RocksDB + // to ingest all SST files in one atomic operation + rocksdb::IngestExternalFileOptions options; + options.move_files = true; + options.snapshot_consistency = false; + options.allow_global_seqno = false; + options.allow_blocking_flush = false; + + std::map + arg_map; + + // Group by column_family + for (auto &commit_info : sst_commit_list) { + if (arg_map.find(commit_info.get_cf()) == arg_map.end()) { + rocksdb::IngestExternalFileArg arg; + arg.column_family = commit_info.get_cf(), + arg.external_files = commit_info.get_committed_files(), + arg.options = options; + + arg_map.emplace(commit_info.get_cf(), arg); + } else { + auto &files = arg_map[commit_info.get_cf()].external_files; + files.insert(files.end(), commit_info.get_committed_files().begin(), + commit_info.get_committed_files().end()); + } + } + + std::vector args; + size_t file_count = 0; + for (auto &cf_files_pair : arg_map) { + args.push_back(cf_files_pair.second); + file_count += cf_files_pair.second.external_files.size(); + } + + const rocksdb::Status s = rdb->IngestExternalFiles(args); + if (THDVAR(m_thd, trace_sst_api)) { + // NO_LINT_DEBUG + sql_print_information( + "SST Tracing: IngestExternalFile '%zu' files returned %s", file_count, + s.ok() ? "ok" : "not ok"); + } + + if (!s.ok()) { + if (print_client_error) { + Rdb_sst_info::report_error_msg(s, nullptr); + } + return HA_ERR_ROCKSDB_BULK_LOAD; + } + + // COMMIT phase: mark everything as completed. This avoids SST file + // deletion kicking in. Otherwise SST files would get deleted if this + // entire operation is aborted + for (auto &commit_info : sst_commit_list) { + commit_info.commit(); + } + + return rc; + } + + int start_bulk_load(ha_rocksdb *const bulk_load, + std::shared_ptr sst_info) { + /* + If we already have an open bulk load of a table and the name doesn't + match the current one, close out the currently running one. This allows + multiple bulk loads to occur on a partitioned table, but then closes + them all out when we switch to another table. + */ + DBUG_ASSERT(bulk_load != nullptr); + + if (!m_curr_bulk_load.empty() && + bulk_load->get_table_basename() != m_curr_bulk_load_tablename) { + const auto res = finish_bulk_load(); + if (res != HA_EXIT_SUCCESS) { + return res; + } + } + + /* + This used to track ha_rocksdb handler objects, but those can be + freed by the table cache while this was referencing them. Instead + of tracking ha_rocksdb handler objects, this now tracks the + Rdb_sst_info allocated, and both the ha_rocksdb handler and the + Rdb_transaction both have shared pointers to them. + + On transaction complete, it will commit each Rdb_sst_info structure found. + If the ha_rocksdb object is freed, etc., it will also commit + the Rdb_sst_info. The Rdb_sst_info commit path needs to be idempotent. + */ + m_curr_bulk_load.push_back(sst_info); + m_curr_bulk_load_tablename = bulk_load->get_table_basename(); + return HA_EXIT_SUCCESS; + } + + int num_ongoing_bulk_load() const { return m_curr_bulk_load.size(); } + + const char *get_rocksdb_tmpdir() const { + const char *tmp_dir = THDVAR(get_thd(), tmpdir); + + /* + We want to treat an empty string as nullptr, in these cases DDL operations + will use the default --tmpdir passed to mysql instead. + */ + if (tmp_dir != nullptr && *tmp_dir == '\0') { + tmp_dir = nullptr; + } + return (tmp_dir); + } + + /* + Flush the data accumulated so far. This assumes we're doing a bulk insert. + + @detail + This should work like transaction commit, except that we don't + synchronize with the binlog (there is no API that would allow to have + binlog flush the changes accumulated so far and return its current + position) + + @todo + Add test coverage for what happens when somebody attempts to do bulk + inserts while inside a multi-statement transaction. + */ + bool flush_batch() { + if (get_write_count() == 0) return false; + + /* Commit the current transaction */ + if (commit_no_binlog()) return true; + + /* Start another one */ + start_tx(); + return false; + } + + void set_auto_incr(const GL_INDEX_ID &gl_index_id, ulonglong curr_id) { + m_auto_incr_map[gl_index_id] = + std::max(m_auto_incr_map[gl_index_id], curr_id); + } + +#ifndef DBUG_OFF + ulonglong get_auto_incr(const GL_INDEX_ID &gl_index_id) { + if (m_auto_incr_map.count(gl_index_id) > 0) { + return m_auto_incr_map[gl_index_id]; + } + return 0; + } +#endif + + virtual rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, + const rocksdb::Slice &value, + const bool assume_tracked) = 0; + virtual rocksdb::Status delete_key( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const bool assume_tracked) = 0; + virtual rocksdb::Status single_delete( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const bool assume_tracked) = 0; + + virtual bool has_modifications() const = 0; + + virtual rocksdb::WriteBatchBase *get_indexed_write_batch() = 0; + /* + Return a WriteBatch that one can write to. The writes will skip any + transaction locking. The writes will NOT be visible to the transaction. + */ + rocksdb::WriteBatchBase *get_blind_write_batch() { + return get_indexed_write_batch()->GetWriteBatch(); + } + + virtual rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, + rocksdb::PinnableSlice *const value) const = 0; + virtual rocksdb::Status get_for_update( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, + bool exclusive, const bool do_validate) = 0; + + rocksdb::Iterator *get_iterator( + rocksdb::ColumnFamilyHandle *const column_family, bool skip_bloom_filter, + bool fill_cache, const rocksdb::Slice &eq_cond_lower_bound, + const rocksdb::Slice &eq_cond_upper_bound, bool read_current = false, + bool create_snapshot = true) { + // Make sure we are not doing both read_current (which implies we don't + // want a snapshot) and create_snapshot which makes sure we create + // a snapshot + DBUG_ASSERT(column_family != nullptr); + DBUG_ASSERT(!read_current || !create_snapshot); + + if (create_snapshot) acquire_snapshot(true); + + rocksdb::ReadOptions options = m_read_opts; + + if (skip_bloom_filter) { + options.total_order_seek = true; + options.iterate_lower_bound = &eq_cond_lower_bound; + options.iterate_upper_bound = &eq_cond_upper_bound; + } else { + // With this option, Iterator::Valid() returns false if key + // is outside of the prefix bloom filter range set at Seek(). + // Must not be set to true if not using bloom filter. + options.prefix_same_as_start = true; + } + options.fill_cache = fill_cache; + if (read_current) { + options.snapshot = nullptr; + } + return get_iterator(options, column_family); + } + + virtual bool is_tx_started() const = 0; + virtual void start_tx() = 0; + virtual void start_stmt() = 0; + + protected: + // Non-virtual functions with actions to be done on transaction start and + // commit. + void on_commit() { + time_t tm; + tm = time(nullptr); + for (auto &it : modified_tables) { + it->m_update_time = tm; + } + modified_tables.clear(); + } + void on_rollback() { + modified_tables.clear(); + } + public: + // Inform the transaction that this table was modified + void log_table_write_op(Rdb_tbl_def *tbl) { + modified_tables.insert(tbl); + } + + void set_initial_savepoint() { + /* + Set the initial savepoint. If the first statement in the transaction + fails, we need something to roll back to, without rolling back the + entire transaction. + */ + do_set_savepoint(); + m_writes_at_last_savepoint = m_write_count; + } + + /* + Called when a "top-level" statement inside a transaction completes + successfully and its changes become part of the transaction's changes. + */ + int make_stmt_savepoint_permanent() { + // Take another RocksDB savepoint only if we had changes since the last + // one. This is very important for long transactions doing lots of + // SELECTs. + if (m_writes_at_last_savepoint != m_write_count) { + rocksdb::WriteBatchBase *batch = get_write_batch(); + rocksdb::Status status = rocksdb::Status::NotFound(); + while ((status = batch->PopSavePoint()) == rocksdb::Status::OK()) { + } + + if (status != rocksdb::Status::NotFound()) { + return HA_EXIT_FAILURE; + } + + do_set_savepoint(); + m_writes_at_last_savepoint = m_write_count; + } + + return HA_EXIT_SUCCESS; + } + + /* + Rollback to the savepoint we've set before the last statement + */ + void rollback_to_stmt_savepoint() { + if (m_writes_at_last_savepoint != m_write_count) { + do_rollback_to_savepoint(); + /* + RollbackToSavePoint "removes the most recent SetSavePoint()", so + we need to set it again so that next statement can roll back to this + stage. + It's ok to do it here at statement end (instead of doing it at next + statement start) because setting a savepoint is cheap. + */ + do_set_savepoint(); + m_writes_at_last_savepoint = m_write_count; + } + } + + virtual void rollback_stmt() = 0; + + void set_tx_failed(bool failed_arg) { m_is_tx_failed = failed_arg; } + + bool can_prepare() const { + if (m_rollback_only) { + my_error(ER_ROLLBACK_ONLY, MYF(0)); + return false; + } + return true; + } + + int rollback_to_savepoint(void *const savepoint) { + if (has_modifications()) { + my_error(ER_ROLLBACK_TO_SAVEPOINT, MYF(0)); + m_rollback_only = true; + return HA_EXIT_FAILURE; + } + return HA_EXIT_SUCCESS; + } + + /* + This is used by transactions started with "START TRANSACTION WITH " + "CONSISTENT [ROCKSDB] SNAPSHOT". When tx_read_only is turned on, + snapshot has to be created via DB::GetSnapshot(), not via Transaction + API. + */ + bool is_tx_read_only() const { return m_tx_read_only; } + + bool is_two_phase() const { return m_is_two_phase; } + + void set_tx_read_only(bool val) { m_tx_read_only = val; } + + explicit Rdb_transaction(THD *const thd) + : m_thd(thd), m_tbl_io_perf(nullptr) { + RDB_MUTEX_LOCK_CHECK(s_tx_list_mutex); + s_tx_list.insert(this); + RDB_MUTEX_UNLOCK_CHECK(s_tx_list_mutex); + } + + virtual ~Rdb_transaction() { + RDB_MUTEX_LOCK_CHECK(s_tx_list_mutex); + s_tx_list.erase(this); + RDB_MUTEX_UNLOCK_CHECK(s_tx_list_mutex); + } + virtual bool is_prepared() { return false; }; + virtual void detach_prepared_tx() {}; +}; + +/* + This is a rocksdb transaction. Its members represent the current transaction, + which consists of: + - the snapshot + - the changes we've made but are not seeing yet. + + The changes are made to individual tables, which store them here and then + this object commits them on commit. +*/ +class Rdb_transaction_impl : public Rdb_transaction { + rocksdb::Transaction *m_rocksdb_tx = nullptr; + rocksdb::Transaction *m_rocksdb_reuse_tx = nullptr; + + public: + void set_lock_timeout(int timeout_sec_arg) override { + if (m_rocksdb_tx) { + m_rocksdb_tx->SetLockTimeout(rdb_convert_sec_to_ms(m_timeout_sec)); + } + } + + void set_sync(bool sync) override { + if (m_rocksdb_tx) + m_rocksdb_tx->GetWriteOptions()->sync = sync; + } + + void release_lock(rocksdb::ColumnFamilyHandle *const column_family, + const std::string &rowkey) override { + if (!THDVAR(m_thd, lock_scanned_rows)) { + m_rocksdb_tx->UndoGetForUpdate(column_family, rocksdb::Slice(rowkey)); + } + } + + virtual bool is_writebatch_trx() const override { return false; } + + bool is_prepared() override { + return m_rocksdb_tx && rocksdb::Transaction::PREPARED == m_rocksdb_tx->GetState(); + } + + void detach_prepared_tx() override { + DBUG_ASSERT(rocksdb::Transaction::PREPARED == m_rocksdb_tx->GetState()); + m_rocksdb_tx = nullptr; + } + +private: + void release_tx(void) { + // We are done with the current active transaction object. Preserve it + // for later reuse. + DBUG_ASSERT(m_rocksdb_reuse_tx == nullptr); + m_rocksdb_reuse_tx = m_rocksdb_tx; + m_rocksdb_tx = nullptr; + } + + bool prepare(const rocksdb::TransactionName &name) override { + rocksdb::Status s; + s = m_rocksdb_tx->SetName(name); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); + return false; + } + + s = merge_auto_incr_map(m_rocksdb_tx->GetWriteBatch()->GetWriteBatch()); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); + return false; + } + + s = m_rocksdb_tx->Prepare(); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); + return false; + } + return true; + } + + bool commit_no_binlog() override { + bool res = false; + rocksdb::Status s; + + s = merge_auto_incr_map(m_rocksdb_tx->GetWriteBatch()->GetWriteBatch()); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); + res = true; + goto error; + } + + release_snapshot(); + s = m_rocksdb_tx->Commit(); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); + res = true; + goto error; + } + + on_commit(); + error: + on_rollback(); + /* Save the transaction object to be reused */ + release_tx(); + + m_write_count = 0; + m_insert_count = 0; + m_update_count = 0; + m_delete_count = 0; + m_lock_count = 0; + set_tx_read_only(false); + m_rollback_only = false; + return res; + } + + public: + void rollback() override { + on_rollback(); + m_write_count = 0; + m_insert_count = 0; + m_update_count = 0; + m_delete_count = 0; + m_lock_count = 0; + m_auto_incr_map.clear(); + m_ddl_transaction = false; + if (m_rocksdb_tx) { + release_snapshot(); + /* This will also release all of the locks: */ + m_rocksdb_tx->Rollback(); + + /* Save the transaction object to be reused */ + release_tx(); + + set_tx_read_only(false); + m_rollback_only = false; + } + } + + void acquire_snapshot(bool acquire_now) override { + if (m_read_opts.snapshot == nullptr) { +#ifdef MARIAROCKS_NOT_YET + const auto thd_ss = std::static_pointer_cast( + m_thd->get_explicit_snapshot()); + if (thd_ss) { + m_explicit_snapshot = thd_ss; + } + if (m_explicit_snapshot) { + auto snapshot = m_explicit_snapshot->get_snapshot()->snapshot(); + snapshot_created(snapshot); + } else +#endif + if (is_tx_read_only()) { + snapshot_created(rdb->GetSnapshot()); + } else if (acquire_now) { + m_rocksdb_tx->SetSnapshot(); + snapshot_created(m_rocksdb_tx->GetSnapshot()); + } else if (!m_is_delayed_snapshot) { + m_rocksdb_tx->SetSnapshotOnNextOperation(m_notifier); + m_is_delayed_snapshot = true; + } + } + } + + void release_snapshot() override { + bool need_clear = m_is_delayed_snapshot; + + if (m_read_opts.snapshot != nullptr) { + m_snapshot_timestamp = 0; +#ifdef MARIAROCKS_NOT_YET + if (m_explicit_snapshot) { + m_explicit_snapshot.reset(); + need_clear = false; + } else +#endif + if (is_tx_read_only()) { + rdb->ReleaseSnapshot(m_read_opts.snapshot); + need_clear = false; + } else { + need_clear = true; + } + m_read_opts.snapshot = nullptr; + } + + if (need_clear && m_rocksdb_tx != nullptr) m_rocksdb_tx->ClearSnapshot(); + } + + bool has_snapshot() { return m_read_opts.snapshot != nullptr; } + + rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const rocksdb::Slice &value, + const bool assume_tracked) override { + ++m_write_count; + ++m_lock_count; + if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) { + return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); + } + return m_rocksdb_tx->Put(column_family, key, value, assume_tracked); + } + + rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, + const bool assume_tracked) override { + ++m_write_count; + ++m_lock_count; + if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) { + return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); + } + return m_rocksdb_tx->Delete(column_family, key, assume_tracked); + } + + rocksdb::Status single_delete( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const bool assume_tracked) override { + ++m_write_count; + ++m_lock_count; + if (m_write_count > m_max_row_locks || m_lock_count > m_max_row_locks) { + return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); + } + return m_rocksdb_tx->SingleDelete(column_family, key, assume_tracked); + } + + bool has_modifications() const override { + return m_rocksdb_tx->GetWriteBatch() && + m_rocksdb_tx->GetWriteBatch()->GetWriteBatch() && + m_rocksdb_tx->GetWriteBatch()->GetWriteBatch()->Count() > 0; + } + + rocksdb::WriteBatchBase *get_write_batch() override { + if (is_two_phase()) { + return m_rocksdb_tx->GetCommitTimeWriteBatch(); + } + return m_rocksdb_tx->GetWriteBatch()->GetWriteBatch(); + } + + /* + Return a WriteBatch that one can write to. The writes will skip any + transaction locking. The writes WILL be visible to the transaction. + */ + rocksdb::WriteBatchBase *get_indexed_write_batch() override { + ++m_write_count; + return m_rocksdb_tx->GetWriteBatch(); + } + + rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, + rocksdb::PinnableSlice *const value) const override { + // clean PinnableSlice right begfore Get() for multiple gets per statement + // the resources after the last Get in a statement are cleared in + // handler::reset call + value->Reset(); + global_stats.queries[QUERIES_POINT].inc(); + return m_rocksdb_tx->Get(m_read_opts, column_family, key, value); + } + + rocksdb::Status get_for_update( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, + bool exclusive, const bool do_validate) override { + if (++m_lock_count > m_max_row_locks) { + return rocksdb::Status::Aborted(rocksdb::Status::kLockLimit); + } + + if (value != nullptr) { + value->Reset(); + } + rocksdb::Status s; + // If snapshot is null, pass it to GetForUpdate and snapshot is + // initialized there. Snapshot validation is skipped in that case. + if (m_read_opts.snapshot == nullptr || do_validate) { + s = m_rocksdb_tx->GetForUpdate( + m_read_opts, column_family, key, value, exclusive, + m_read_opts.snapshot ? do_validate : false); + } else { + // If snapshot is set, and if skipping validation, + // call GetForUpdate without validation and set back old snapshot + auto saved_snapshot = m_read_opts.snapshot; + m_read_opts.snapshot = nullptr; + s = m_rocksdb_tx->GetForUpdate(m_read_opts, column_family, key, value, + exclusive, false); + m_read_opts.snapshot = saved_snapshot; + } + return s; + } + + rocksdb::Iterator *get_iterator( + const rocksdb::ReadOptions &options, + rocksdb::ColumnFamilyHandle *const column_family) override { + global_stats.queries[QUERIES_RANGE].inc(); + return m_rocksdb_tx->GetIterator(options, column_family); + } + + const rocksdb::Transaction *get_rdb_trx() const { return m_rocksdb_tx; } + + bool is_tx_started() const override { return (m_rocksdb_tx != nullptr); } + + void start_tx() override { + rocksdb::TransactionOptions tx_opts; + rocksdb::WriteOptions write_opts; + tx_opts.set_snapshot = false; + tx_opts.lock_timeout = rdb_convert_sec_to_ms(m_timeout_sec); + tx_opts.deadlock_detect = THDVAR(m_thd, deadlock_detect); + tx_opts.deadlock_detect_depth = THDVAR(m_thd, deadlock_detect_depth); + // If this variable is set, this will write commit time write batch + // information on recovery or memtable flush. + tx_opts.use_only_the_last_commit_time_batch_for_recovery = + THDVAR(m_thd, commit_time_batch_for_recovery); + tx_opts.max_write_batch_size = THDVAR(m_thd, write_batch_max_bytes); + + write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); + write_opts.disableWAL = THDVAR(m_thd, write_disable_wal); + write_opts.ignore_missing_column_families = + THDVAR(m_thd, write_ignore_missing_column_families); + m_is_two_phase = rocksdb_enable_2pc; + + commit_ordered_done= false; + + /* + If m_rocksdb_reuse_tx is null this will create a new transaction object. + Otherwise it will reuse the existing one. + */ + m_rocksdb_tx = + rdb->BeginTransaction(write_opts, tx_opts, m_rocksdb_reuse_tx); + m_rocksdb_reuse_tx = nullptr; + + m_read_opts = rocksdb::ReadOptions(); + + set_initial_savepoint(); + + m_ddl_transaction = false; + } + + /* Implementations of do_*savepoint based on rocksdB::Transaction savepoints + */ + void do_set_savepoint() override { m_rocksdb_tx->SetSavePoint(); } + + void do_rollback_to_savepoint() override { + m_rocksdb_tx->RollbackToSavePoint(); + } + + /* + Start a statement inside a multi-statement transaction. + + @todo: are we sure this is called once (and not several times) per + statement start? + + For hooking to start of statement that is its own transaction, see + ha_rocksdb::external_lock(). + */ + void start_stmt() override { + // Set the snapshot to delayed acquisition (SetSnapshotOnNextOperation) + acquire_snapshot(false); + } + + /* + This must be called when last statement is rolled back, but the transaction + continues + */ + void rollback_stmt() override { + /* TODO: here we must release the locks taken since the start_stmt() call */ + if (m_rocksdb_tx) { + const rocksdb::Snapshot *const org_snapshot = m_rocksdb_tx->GetSnapshot(); + rollback_to_stmt_savepoint(); + + const rocksdb::Snapshot *const cur_snapshot = m_rocksdb_tx->GetSnapshot(); + if (org_snapshot != cur_snapshot) { + if (org_snapshot != nullptr) m_snapshot_timestamp = 0; + + m_read_opts.snapshot = cur_snapshot; + if (cur_snapshot != nullptr) { + rdb->GetEnv()->GetCurrentTime(&m_snapshot_timestamp); + } else { + m_is_delayed_snapshot = true; + } + } + } + } + + explicit Rdb_transaction_impl(THD *const thd) + : Rdb_transaction(thd), m_rocksdb_tx(nullptr) { + // Create a notifier that can be called when a snapshot gets generated. + m_notifier = std::make_shared(this); + } + + virtual ~Rdb_transaction_impl() override { + rollback(); + + // Theoretically the notifier could outlive the Rdb_transaction_impl + // (because of the shared_ptr), so let it know it can't reference + // the transaction anymore. + m_notifier->detach(); + + // Free any transaction memory that is still hanging around. + delete m_rocksdb_reuse_tx; + DBUG_ASSERT(m_rocksdb_tx == nullptr); + } +}; + +/* This is a rocksdb write batch. This class doesn't hold or wait on any + transaction locks (skips rocksdb transaction API) thus giving better + performance. + + Currently this is only used for replication threads which are guaranteed + to be non-conflicting. Any further usage of this class should completely + be thought thoroughly. +*/ +class Rdb_writebatch_impl : public Rdb_transaction { + rocksdb::WriteBatchWithIndex *m_batch; + rocksdb::WriteOptions write_opts; + // Called after commit/rollback. + void reset() { + m_batch->Clear(); + m_read_opts = rocksdb::ReadOptions(); + m_ddl_transaction = false; + } + + private: + bool prepare(const rocksdb::TransactionName &name) override { return true; } + + bool commit_no_binlog() override { + bool res = false; + rocksdb::Status s; + rocksdb::TransactionDBWriteOptimizations optimize; + optimize.skip_concurrency_control = true; + + s = merge_auto_incr_map(m_batch->GetWriteBatch()); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); + res = true; + goto error; + } + + release_snapshot(); + + s = rdb->Write(write_opts, optimize, m_batch->GetWriteBatch()); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_TX_COMMIT); + res = true; + goto error; + } + on_commit(); + error: + on_rollback(); + reset(); + + m_write_count = 0; + m_insert_count = 0; + m_update_count = 0; + m_delete_count = 0; + set_tx_read_only(false); + m_rollback_only = false; + return res; + } + + /* Implementations of do_*savepoint based on rocksdB::WriteBatch savepoints */ + void do_set_savepoint() override { m_batch->SetSavePoint(); } + + void do_rollback_to_savepoint() override { m_batch->RollbackToSavePoint(); } + + + public: + bool is_writebatch_trx() const override { return true; } + + void set_lock_timeout(int timeout_sec_arg) override { + // Nothing to do here. + } + + void set_sync(bool sync) override { write_opts.sync = sync; } + + void release_lock(rocksdb::ColumnFamilyHandle *const column_family, + const std::string &rowkey) override { + // Nothing to do here since we don't hold any row locks. + } + + void rollback() override { + on_rollback(); + m_write_count = 0; + m_insert_count = 0; + m_update_count = 0; + m_delete_count = 0; + m_lock_count = 0; + release_snapshot(); + + reset(); + set_tx_read_only(false); + m_rollback_only = false; + } + + void acquire_snapshot(bool acquire_now) override { + if (m_read_opts.snapshot == nullptr) snapshot_created(rdb->GetSnapshot()); + } + + void release_snapshot() override { + if (m_read_opts.snapshot != nullptr) { + rdb->ReleaseSnapshot(m_read_opts.snapshot); + m_read_opts.snapshot = nullptr; + } + } + + rocksdb::Status put(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const rocksdb::Slice &value, + const bool assume_tracked) override { + ++m_write_count; + m_batch->Put(column_family, key, value); + // Note Put/Delete in write batch doesn't return any error code. We simply + // return OK here. + return rocksdb::Status::OK(); + } + + rocksdb::Status delete_key(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, + const bool assume_tracked) override { + ++m_write_count; + m_batch->Delete(column_family, key); + return rocksdb::Status::OK(); + } + + rocksdb::Status single_delete( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, const bool /* assume_tracked */) override { + ++m_write_count; + m_batch->SingleDelete(column_family, key); + return rocksdb::Status::OK(); + } + + bool has_modifications() const override { + return m_batch->GetWriteBatch()->Count() > 0; + } + + rocksdb::WriteBatchBase *get_write_batch() override { return m_batch; } + + rocksdb::WriteBatchBase *get_indexed_write_batch() override { + ++m_write_count; + return m_batch; + } + + rocksdb::Status get(rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, + rocksdb::PinnableSlice *const value) const override { + value->Reset(); + return m_batch->GetFromBatchAndDB(rdb, m_read_opts, column_family, key, + value); + } + + rocksdb::Status get_for_update( + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value, + bool /* exclusive */, const bool /* do_validate */) override { + if (value == nullptr) { + rocksdb::PinnableSlice pin_val; + rocksdb::Status s = get(column_family, key, &pin_val); + pin_val.Reset(); + return s; + } + + return get(column_family, key, value); + } + + rocksdb::Iterator *get_iterator( + const rocksdb::ReadOptions &options, + rocksdb::ColumnFamilyHandle *const /* column_family */) override { + const auto it = rdb->NewIterator(options); + return m_batch->NewIteratorWithBase(it); + } + + bool is_tx_started() const override { return (m_batch != nullptr); } + + void start_tx() override { + commit_ordered_done= false; // Do we need this here? + reset(); + write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); + write_opts.disableWAL = THDVAR(m_thd, write_disable_wal); + write_opts.ignore_missing_column_families = + THDVAR(m_thd, write_ignore_missing_column_families); + + set_initial_savepoint(); + } + + void start_stmt() override {} + + void rollback_stmt() override { + if (m_batch) rollback_to_stmt_savepoint(); + } + + explicit Rdb_writebatch_impl(THD *const thd) + : Rdb_transaction(thd), m_batch(nullptr) { + m_batch = new rocksdb::WriteBatchWithIndex(rocksdb::BytewiseComparator(), 0, + true); + } + + virtual ~Rdb_writebatch_impl() override { + rollback(); + delete m_batch; + } +}; + +void Rdb_snapshot_notifier::SnapshotCreated( + const rocksdb::Snapshot *const snapshot) { + if (m_owning_tx != nullptr) { + m_owning_tx->snapshot_created(snapshot); + } +} + +std::multiset Rdb_transaction::s_tx_list; +mysql_mutex_t Rdb_transaction::s_tx_list_mutex; + +static Rdb_transaction *get_tx_from_thd(THD *const thd) { + return reinterpret_cast( + my_core::thd_get_ha_data(thd, rocksdb_hton)); +} + +namespace { + +class Rdb_perf_context_guard { + Rdb_io_perf m_io_perf; + Rdb_io_perf *m_io_perf_ptr; + Rdb_transaction *m_tx; + uint m_level; + + public: + Rdb_perf_context_guard(const Rdb_perf_context_guard &) = delete; + Rdb_perf_context_guard &operator=(const Rdb_perf_context_guard &) = delete; + + explicit Rdb_perf_context_guard(Rdb_io_perf *io_perf, uint level) + : m_io_perf_ptr(io_perf), m_tx(nullptr), m_level(level) { + m_io_perf_ptr->start(m_level); + } + + explicit Rdb_perf_context_guard(Rdb_transaction *tx, uint level) + : m_io_perf_ptr(nullptr), m_tx(tx), m_level(level) { + /* + if perf_context information is already being recorded, this becomes a + no-op + */ + if (tx != nullptr) { + tx->io_perf_start(&m_io_perf); + } + } + + ~Rdb_perf_context_guard() { + if (m_tx != nullptr) { + m_tx->io_perf_end_and_record(); + } else if (m_io_perf_ptr != nullptr) { + m_io_perf_ptr->end_and_record(m_level); + } + } +}; + +} // anonymous namespace + +/* + TODO: maybe, call this in external_lock() and store in ha_rocksdb.. +*/ + +static Rdb_transaction *get_or_create_tx(THD *const thd) { + Rdb_transaction *tx = get_tx_from_thd(thd); + // TODO: this is called too many times.. O(#rows) + if (tx == nullptr) { + bool rpl_skip_tx_api= false; // MARIAROCKS_NOT_YET. + if ((rpl_skip_tx_api && thd->rgi_slave) || + (THDVAR(thd, master_skip_tx_api) && !thd->rgi_slave)) + { + tx = new Rdb_writebatch_impl(thd); + } else { + tx = new Rdb_transaction_impl(thd); + } + tx->set_params(THDVAR(thd, lock_wait_timeout), THDVAR(thd, max_row_locks)); + tx->start_tx(); + my_core::thd_set_ha_data(thd, rocksdb_hton, tx); + } else { + tx->set_params(THDVAR(thd, lock_wait_timeout), THDVAR(thd, max_row_locks)); + if (!tx->is_tx_started()) { + tx->start_tx(); + } + } + + return tx; +} + +static int rocksdb_close_connection(handlerton *const hton, THD *const thd) { + Rdb_transaction *tx = get_tx_from_thd(thd); + if (tx != nullptr) { + bool is_critical_error; + int rc = tx->finish_bulk_load(&is_critical_error, false); + if (rc != 0 && is_critical_error) { + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Error %d finalizing last SST file while " + "disconnecting", + rc); + } + if (tx->is_prepared()) + tx->detach_prepared_tx(); + delete tx; + } + return HA_EXIT_SUCCESS; +} + +/* + * Serializes an xid to a string so that it can + * be used as a rocksdb transaction name + */ +static std::string rdb_xid_to_string(const XID &src) { + DBUG_ASSERT(src.gtrid_length >= 0 && src.gtrid_length <= MAXGTRIDSIZE); + DBUG_ASSERT(src.bqual_length >= 0 && src.bqual_length <= MAXBQUALSIZE); + + std::string buf; + buf.reserve(RDB_XIDHDR_LEN + src.gtrid_length + src.bqual_length); + + /* + * expand formatID to fill 8 bytes if it doesn't already + * then reinterpret bit pattern as unsigned and store in network order + */ + uchar fidbuf[RDB_FORMATID_SZ]; + int64 signed_fid8 = src.formatID; + const uint64 raw_fid8 = *reinterpret_cast(&signed_fid8); + rdb_netbuf_store_uint64(fidbuf, raw_fid8); + buf.append(reinterpret_cast(fidbuf), RDB_FORMATID_SZ); + + buf.push_back(src.gtrid_length); + buf.push_back(src.bqual_length); + buf.append(src.data, (src.gtrid_length) + (src.bqual_length)); + return buf; +} + +#if 0 +// MARIAROCKS: MariaDB doesn't have flush_wal method +/** + Called by hton->flush_logs after MySQL group commit prepares a set of + transactions. +*/ +static bool rocksdb_flush_wal(handlerton* hton __attribute__((__unused__))) + DBUG_ASSERT(rdb != nullptr); + + rocksdb::Status s; + /* + target_lsn is set to 0 when MySQL wants to sync the wal files + */ + if ((target_lsn == 0 && !rocksdb_db_options->allow_mmap_writes) || + rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) { + rocksdb_wal_group_syncs++; + s = rdb->FlushWAL(target_lsn == 0 || + rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC); + } + + if (!s.ok()) { + rdb_log_status_error(s); + return HA_EXIT_FAILURE; + } + return HA_EXIT_SUCCESS; +} +#endif + +/** + For a slave, prepare() updates the slave_gtid_info table which tracks the + replication progress. +*/ +static int rocksdb_prepare(handlerton* hton, THD* thd, bool prepare_tx) +{ + bool async=false; // This is "ASYNC_COMMIT" feature which is only present in webscalesql + + Rdb_transaction *tx = get_tx_from_thd(thd); + if (!tx->can_prepare()) { + return HA_EXIT_FAILURE; + } + if (prepare_tx || + (!my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { + /* We were instructed to prepare the whole transaction, or + this is an SQL statement end and autocommit is on */ + +#ifdef MARIAROCKS_NOT_YET + /* + Storing binlog position inside MyRocks is needed only for restoring + MyRocks from backups. This feature is not supported yet. + */ + std::vector slave_gtid_info; + my_core::thd_slave_gtid_info(thd, &slave_gtid_info); + for (const auto &it : slave_gtid_info) { + rocksdb::WriteBatchBase *const write_batch = tx->get_blind_write_batch(); + binlog_manager.update_slave_gtid_info(it.id, it.db, it.gtid, write_batch); + } +#endif + + if (tx->is_two_phase()) { + + /* + MariaDB: the following branch is never taken. + We always flush at Prepare and rely on RocksDB's internal Group Commit + to do some grouping. + */ + if (thd->durability_property == HA_IGNORE_DURABILITY || async) { + tx->set_sync(false); + } + + /* + MariaDB: do not flush logs if we are running in a non-crash-safe mode. + */ + if (!rocksdb_flush_log_at_trx_commit) + tx->set_sync(false); + + XID xid; + thd_get_xid(thd, reinterpret_cast(&xid)); + if (!tx->prepare(rdb_xid_to_string(xid))) { + return HA_EXIT_FAILURE; + } + + /* + MariaDB: our Group Commit implementation does not use the + hton->flush_logs call (at least currently) so the following is not + needed (TODO: will we need this for binlog rotation?) + */ +#ifdef MARIAROCKS_NOT_YET + if (thd->durability_property == HA_IGNORE_DURABILITY ) + (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER)) + && + THDVAR(thd, flush_log_at_trx_commit)) +#endif +#ifdef MARIAROCKS_NOT_YET + { + // MariaRocks: disable the + // "write/sync redo log before flushing binlog cache to file" + // feature. See a869c56d361bb44f46c0efeb11a8f03561676247 + /** + we set the log sequence as '1' just to trigger hton->flush_logs + */ + thd_store_lsn(thd, 1, DB_TYPE_ROCKSDB); + } +#endif + } + + DEBUG_SYNC(thd, "rocksdb.prepared"); + } else { + tx->make_stmt_savepoint_permanent(); + } + return HA_EXIT_SUCCESS; +} + +/** + do nothing for prepare/commit by xid + this is needed to avoid crashes in XA scenarios +*/ +static int rocksdb_commit_by_xid(handlerton *const hton, XID *const xid) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(hton != nullptr); + DBUG_ASSERT(xid != nullptr); + DBUG_ASSERT(commit_latency_stats != nullptr); + + rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true); + + const auto name = rdb_xid_to_string(*xid); + DBUG_ASSERT(!name.empty()); + + rocksdb::Transaction *const trx = rdb->GetTransactionByName(name); + + if (trx == nullptr) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + + const rocksdb::Status s = trx->Commit(); + + if (!s.ok()) { + rdb_log_status_error(s); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + delete trx; + + // `Add()` is implemented in a thread-safe manner. + commit_latency_stats->Add(timer.ElapsedNanos() / 1000); + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +static int rocksdb_rollback_by_xid( + handlerton *const hton MY_ATTRIBUTE((__unused__)), XID *const xid) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(hton != nullptr); + DBUG_ASSERT(xid != nullptr); + DBUG_ASSERT(rdb != nullptr); + + const auto name = rdb_xid_to_string(*xid); + + rocksdb::Transaction *const trx = rdb->GetTransactionByName(name); + + if (trx == nullptr) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + + const rocksdb::Status s = trx->Rollback(); + + if (!s.ok()) { + rdb_log_status_error(s); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + delete trx; + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/** + Rebuilds an XID from a serialized version stored in a string. +*/ +static void rdb_xid_from_string(const std::string &src, XID *const dst) { + DBUG_ASSERT(dst != nullptr); + uint offset = 0; + uint64 raw_fid8 = + rdb_netbuf_to_uint64(reinterpret_cast(src.data())); + const int64 signed_fid8 = *reinterpret_cast(&raw_fid8); + dst->formatID = signed_fid8; + offset += RDB_FORMATID_SZ; + dst->gtrid_length = src.at(offset); + offset += RDB_GTRID_SZ; + dst->bqual_length = src.at(offset); + offset += RDB_BQUAL_SZ; + + DBUG_ASSERT(dst->gtrid_length >= 0 && dst->gtrid_length <= MAXGTRIDSIZE); + DBUG_ASSERT(dst->bqual_length >= 0 && dst->bqual_length <= MAXBQUALSIZE); + + memset(dst->data, 0, XIDDATASIZE); + src.copy(dst->data, (dst->gtrid_length) + (dst->bqual_length), + RDB_XIDHDR_LEN); +} + +/** + Reading last committed binary log info from RocksDB system row. + The info is needed for crash safe slave/master to work. +*/ +static int rocksdb_recover(handlerton* hton, XID* xid_list, uint len) +#ifdef MARIAROCKS_NOT_YET + char* const binlog_file, + my_off_t *const binlog_pos, + Gtid *const binlog_max_gtid) { +#endif +{ +#ifdef MARIAROCKS_NOT_YET + if (binlog_file && binlog_pos) { + char file_buf[FN_REFLEN + 1] = {0}; + my_off_t pos; + char gtid_buf[FN_REFLEN + 1] = {0}; + if (binlog_manager.read(file_buf, &pos, gtid_buf)) { + if (is_binlog_advanced(binlog_file, *binlog_pos, file_buf, pos)) { + memcpy(binlog_file, file_buf, FN_REFLEN + 1); + *binlog_pos = pos; + // NO_LINT_DEBUG + fprintf(stderr, + "RocksDB: Last binlog file position %llu," + " file name %s\n", + pos, file_buf); + if (*gtid_buf) { + global_sid_lock->rdlock(); + binlog_max_gtid->parse(global_sid_map, gtid_buf); + global_sid_lock->unlock(); + // NO_LINT_DEBUG + fprintf(stderr, "RocksDB: Last MySQL Gtid %s\n", gtid_buf); + } + } + } + } +#endif + + if (len == 0 || xid_list == nullptr) { + return HA_EXIT_SUCCESS; + } + + std::vector trans_list; + rdb->GetAllPreparedTransactions(&trans_list); + + uint count = 0; + for (auto &trans : trans_list) { + if (count >= len) { + break; + } + auto name = trans->GetName(); + rdb_xid_from_string(name, &xid_list[count]); + count++; + } + return count; +} + + +/* + Handle a commit checkpoint request from server layer. + + InnoDB does this: + We put the request in a queue, so that we can notify upper layer about + checkpoint complete when we have flushed the redo log. + If we have already flushed all relevant redo log, we notify immediately. + + MariaRocks just flushes everything right away ATM +*/ + +static void rocksdb_checkpoint_request(void *cookie) +{ + const rocksdb::Status s= rdb->FlushWAL(true); + //TODO: what to do on error? + if (s.ok()) + { + rocksdb_wal_group_syncs++; + commit_checkpoint_notify_ha(cookie); + } +} + +/* + @param all: TRUE - commit the transaction + FALSE - SQL statement ended +*/ +static void rocksdb_commit_ordered(handlerton *hton, THD* thd, bool all) +{ + // Same assert as InnoDB has + DBUG_ASSERT(all || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | + OPTION_BEGIN))); + Rdb_transaction *tx = get_tx_from_thd(thd); + if (!tx->is_two_phase()) { + /* + ordered_commit is supposedly slower as it is done sequentially + in order to preserve commit order. + + if we are not required do 2-phase commit with the binlog, do not do + anything here. + */ + return; + } + + tx->set_sync(false); + + /* This will note the master position also */ + tx->commit_ordered_res= tx->commit(); + tx->commit_ordered_done= true; + +} + + +static int rocksdb_commit(handlerton* hton, THD* thd, bool commit_tx) +{ + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(hton != nullptr); + DBUG_ASSERT(thd != nullptr); + DBUG_ASSERT(commit_latency_stats != nullptr); + + rocksdb::StopWatchNano timer(rocksdb::Env::Default(), true); + + /* note: h->external_lock(F_UNLCK) is called after this function is called) */ + Rdb_transaction *tx = get_tx_from_thd(thd); + + /* this will trigger saving of perf_context information */ + Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd)); + + if (tx != nullptr) { + if (commit_tx || (!my_core::thd_test_options( + thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { + /* + This will not add anything to commit_latency_stats, and this is correct + right? + */ + if (tx->commit_ordered_done) + { + thd_wakeup_subsequent_commits(thd, 0); + DBUG_RETURN((tx->commit_ordered_res? HA_ERR_INTERNAL_ERROR: 0)); + } + + /* + We get here + - For a COMMIT statement that finishes a multi-statement transaction + - For a statement that has its own transaction + */ + if (thd->slave_thread) + { + // An attempt to make parallel slave performant (not fully successful, + // see MDEV-15372): + + // First, commit without syncing. This establishes the commit order + tx->set_sync(false); + bool tx_had_writes = tx->get_write_count()? true : false ; + if (tx->commit()) { + DBUG_RETURN(HA_ERR_ROCKSDB_COMMIT_FAILED); + } + thd_wakeup_subsequent_commits(thd, 0); + + if (tx_had_writes && rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC) + { + rocksdb::Status s= rdb->FlushWAL(true); + if (!s.ok()) + DBUG_RETURN(HA_ERR_INTERNAL_ERROR); + } + } + else + { + /* Not a slave thread */ + if (tx->commit()) { + DBUG_RETURN(HA_ERR_ROCKSDB_COMMIT_FAILED); + } + } + } else { + /* + We get here when committing a statement within a transaction. + */ + tx->make_stmt_savepoint_permanent(); + } + + if (my_core::thd_tx_isolation(thd) <= ISO_READ_COMMITTED) { + // For READ_COMMITTED, we release any existing snapshot so that we will + // see any changes that occurred since the last statement. + tx->release_snapshot(); + } + } + + // `Add()` is implemented in a thread-safe manner. + commit_latency_stats->Add(timer.ElapsedNanos() / 1000); + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + + +static int rocksdb_rollback(handlerton *const hton, THD *const thd, + bool rollback_tx) { + Rdb_transaction *tx = get_tx_from_thd(thd); + Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd)); + + if (tx != nullptr) { + if (rollback_tx) { + /* + We get here, when + - ROLLBACK statement is issued. + + Discard the changes made by the transaction + */ + tx->rollback(); + } else { + /* + We get here when + - a statement with AUTOCOMMIT=1 is being rolled back (because of some + error) + - a statement inside a transaction is rolled back + */ + + tx->rollback_stmt(); + tx->set_tx_failed(true); + } + + if (my_core::thd_tx_isolation(thd) <= ISO_READ_COMMITTED) { + // For READ_COMMITTED, we release any existing snapshot so that we will + // see any changes that occurred since the last statement. + tx->release_snapshot(); + } + } + return HA_EXIT_SUCCESS; +} + +static bool print_stats(THD *const thd, std::string const &type, + std::string const &name, std::string const &status, + stat_print_fn *stat_print) { + return stat_print(thd, type.c_str(), type.size(), name.c_str(), name.size(), + status.c_str(), status.size()); +} + +static std::string format_string(const char *const format, ...) { + std::string res; + va_list args; + va_list args_copy; + char static_buff[256]; + + DBUG_ASSERT(format != nullptr); + + va_start(args, format); + va_copy(args_copy, args); + + // Calculate how much space we will need + int len = vsnprintf(nullptr, 0, format, args); + va_end(args); + + if (len < 0) { + res = std::string(""); + } else if (len == 0) { + // Shortcut for an empty string + res = std::string(""); + } else { + // For short enough output use a static buffer + char *buff = static_buff; + std::unique_ptr dynamic_buff = nullptr; + + len++; // Add one for null terminator + + // for longer output use an allocated buffer + if (static_cast(len) > sizeof(static_buff)) { + dynamic_buff.reset(new char[len]); + buff = dynamic_buff.get(); + } + + // Now re-do the vsnprintf with the buffer which is now large enough + (void)vsnprintf(buff, len, format, args_copy); + + // Convert to a std::string. Note we could have created a std::string + // large enough and then converted the buffer to a 'char*' and created + // the output in place. This would probably work but feels like a hack. + // Since this isn't code that needs to be super-performant we are going + // with this 'safer' method. + res = std::string(buff); + } + + va_end(args_copy); + + return res; +} + +class Rdb_snapshot_status : public Rdb_tx_list_walker { + private: + std::string m_data; + + static std::string current_timestamp(void) { + static const char *const format = "%d-%02d-%02d %02d:%02d:%02d"; + time_t currtime; + struct tm currtm; + + time(&currtime); + + localtime_r(&currtime, &currtm); + + return format_string(format, currtm.tm_year + 1900, currtm.tm_mon + 1, + currtm.tm_mday, currtm.tm_hour, currtm.tm_min, + currtm.tm_sec); + } + + static std::string get_header(void) { + return "\n============================================================\n" + + current_timestamp() + + " ROCKSDB TRANSACTION MONITOR OUTPUT\n" + "============================================================\n" + "---------\n" + "SNAPSHOTS\n" + "---------\n" + "LIST OF SNAPSHOTS FOR EACH SESSION:\n"; + } + + static std::string get_footer(void) { + return "-----------------------------------------\n" + "END OF ROCKSDB TRANSACTION MONITOR OUTPUT\n" + "=========================================\n"; + } + + static Rdb_deadlock_info::Rdb_dl_trx_info get_dl_txn_info( + const rocksdb::DeadlockInfo &txn, const GL_INDEX_ID &gl_index_id) { + Rdb_deadlock_info::Rdb_dl_trx_info txn_data; + + txn_data.trx_id = txn.m_txn_id; + + txn_data.table_name = ddl_manager.safe_get_table_name(gl_index_id); + if (txn_data.table_name.empty()) { + txn_data.table_name = + "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id); + } + + auto kd = ddl_manager.safe_find(gl_index_id); + txn_data.index_name = + (kd) ? kd->get_name() + : "NOT FOUND; INDEX_ID: " + std::to_string(gl_index_id.index_id); + + rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(txn.m_cf_id); + txn_data.cf_name = cfh->GetName(); + + txn_data.waiting_key = + rdb_hexdump(txn.m_waiting_key.c_str(), txn.m_waiting_key.length()); + + txn_data.exclusive_lock = txn.m_exclusive; + + return txn_data; + } + + static Rdb_deadlock_info get_dl_path_trx_info( + const rocksdb::DeadlockPath &path_entry) { + Rdb_deadlock_info deadlock_info; + + for (auto it = path_entry.path.begin(); it != path_entry.path.end(); it++) { + const auto &txn = *it; + const GL_INDEX_ID gl_index_id = { + txn.m_cf_id, rdb_netbuf_to_uint32(reinterpret_cast( + txn.m_waiting_key.c_str()))}; + deadlock_info.path.push_back(get_dl_txn_info(txn, gl_index_id)); + } + DBUG_ASSERT_IFF(path_entry.limit_exceeded, path_entry.path.empty()); + /* print the first txn in the path to display the full deadlock cycle */ + if (!path_entry.path.empty() && !path_entry.limit_exceeded) { + const auto &deadlocking_txn = *(path_entry.path.end() - 1); + deadlock_info.victim_trx_id = deadlocking_txn.m_txn_id; + deadlock_info.deadlock_time = path_entry.deadlock_time; + } + return deadlock_info; + } + + public: + Rdb_snapshot_status() : m_data(get_header()) {} + + std::string getResult() { return m_data + get_footer(); } + + /* Implement Rdb_transaction interface */ + /* Create one row in the snapshot status table */ + void process_tran(const Rdb_transaction *const tx) override { + DBUG_ASSERT(tx != nullptr); + + /* Calculate the duration the snapshot has existed */ + int64_t snapshot_timestamp = tx->m_snapshot_timestamp; + if (snapshot_timestamp != 0) { + int64_t curr_time; + rdb->GetEnv()->GetCurrentTime(&curr_time); + + char buffer[1024]; +#ifdef MARIAROCKS_NOT_YET + thd_security_context(tx->get_thd(), buffer, sizeof buffer, 0); +#endif + m_data += format_string( + "---SNAPSHOT, ACTIVE %lld sec\n" + "%s\n" + "lock count %llu, write count %llu\n" + "insert count %llu, update count %llu, delete count %llu\n", + (longlong)(curr_time - snapshot_timestamp), buffer, tx->get_lock_count(), + tx->get_write_count(), tx->get_insert_count(), tx->get_update_count(), + tx->get_delete_count()); + } + } + + void populate_deadlock_buffer() { + auto dlock_buffer = rdb->GetDeadlockInfoBuffer(); + m_data += "----------LATEST DETECTED DEADLOCKS----------\n"; + + for (const auto &path_entry : dlock_buffer) { + std::string path_data; + if (path_entry.limit_exceeded) { + path_data += "\n-------DEADLOCK EXCEEDED MAX DEPTH-------\n"; + } else { + path_data += + "\n*** DEADLOCK PATH\n" + "=========================================\n"; + const auto dl_info = get_dl_path_trx_info(path_entry); + const auto deadlock_time = dl_info.deadlock_time; + for (auto it = dl_info.path.begin(); it != dl_info.path.end(); it++) { + const auto &trx_info = *it; + path_data += format_string( + "TIMESTAMP: %" PRId64 + "\n" + "TRANSACTION ID: %u\n" + "COLUMN FAMILY NAME: %s\n" + "WAITING KEY: %s\n" + "LOCK TYPE: %s\n" + "INDEX NAME: %s\n" + "TABLE NAME: %s\n", + deadlock_time, trx_info.trx_id, trx_info.cf_name.c_str(), + trx_info.waiting_key.c_str(), + trx_info.exclusive_lock ? "EXCLUSIVE" : "SHARED", + trx_info.index_name.c_str(), trx_info.table_name.c_str()); + if (it != dl_info.path.end() - 1) { + path_data += "---------------WAITING FOR---------------\n"; + } + } + path_data += format_string( + "\n--------TRANSACTION ID: %u GOT DEADLOCK---------\n", + dl_info.victim_trx_id); + } + m_data += path_data; + } + } + + std::vector get_deadlock_info() { + std::vector deadlock_info; + auto dlock_buffer = rdb->GetDeadlockInfoBuffer(); + for (const auto &path_entry : dlock_buffer) { + if (!path_entry.limit_exceeded) { + deadlock_info.push_back(get_dl_path_trx_info(path_entry)); + } + } + return deadlock_info; + } +}; + +/** + * @brief + * walks through all non-replication transactions and copies + * out relevant information for information_schema.rocksdb_trx + */ +class Rdb_trx_info_aggregator : public Rdb_tx_list_walker { + private: + std::vector *m_trx_info; + + public: + explicit Rdb_trx_info_aggregator(std::vector *const trx_info) + : m_trx_info(trx_info) {} + + void process_tran(const Rdb_transaction *const tx) override { + static const std::map state_map = { + {rocksdb::Transaction::STARTED, "STARTED"}, + {rocksdb::Transaction::AWAITING_PREPARE, "AWAITING_PREPARE"}, + {rocksdb::Transaction::PREPARED, "PREPARED"}, + {rocksdb::Transaction::AWAITING_COMMIT, "AWAITING_COMMIT"}, + {rocksdb::Transaction::COMMITED, "COMMITED"}, + {rocksdb::Transaction::AWAITING_ROLLBACK, "AWAITING_ROLLBACK"}, + {rocksdb::Transaction::ROLLEDBACK, "ROLLEDBACK"}, + }; + + DBUG_ASSERT(tx != nullptr); + + THD *const thd = tx->get_thd(); + ulong thread_id = thd_get_thread_id(thd); + + if (tx->is_writebatch_trx()) { + const auto wb_impl = static_cast(tx); + DBUG_ASSERT(wb_impl); + m_trx_info->push_back( + {"", /* name */ + 0, /* trx_id */ + wb_impl->get_write_count(), 0, /* lock_count */ + 0, /* timeout_sec */ + "", /* state */ + "", /* waiting_key */ + 0, /* waiting_cf_id */ + 1, /*is_replication */ + 1, /* skip_trx_api */ + wb_impl->is_tx_read_only(), 0, /* deadlock detection */ + wb_impl->num_ongoing_bulk_load(), thread_id, "" /* query string */}); + } else { + const auto tx_impl = static_cast(tx); + DBUG_ASSERT(tx_impl); + const rocksdb::Transaction *rdb_trx = tx_impl->get_rdb_trx(); + + if (rdb_trx == nullptr) { + return; + } + + char query_buf[NAME_LEN+1]; + thd_query_safe(thd, query_buf, sizeof(query_buf)); + std::string query_str(query_buf); + + const auto state_it = state_map.find(rdb_trx->GetState()); + DBUG_ASSERT(state_it != state_map.end()); + const int is_replication = (thd->rgi_slave != nullptr); + uint32_t waiting_cf_id; + std::string waiting_key; + rdb_trx->GetWaitingTxns(&waiting_cf_id, &waiting_key), + + m_trx_info->push_back( + {rdb_trx->GetName(), rdb_trx->GetID(), tx_impl->get_write_count(), + tx_impl->get_lock_count(), tx_impl->get_timeout_sec(), + state_it->second, waiting_key, waiting_cf_id, is_replication, + 0, /* skip_trx_api */ + tx_impl->is_tx_read_only(), rdb_trx->IsDeadlockDetect(), + tx_impl->num_ongoing_bulk_load(), thread_id, query_str}); + } + } +}; + +/* + returns a vector of info for all non-replication threads + for use by information_schema.rocksdb_trx +*/ +std::vector rdb_get_all_trx_info() { + std::vector trx_info; + Rdb_trx_info_aggregator trx_info_agg(&trx_info); + Rdb_transaction::walk_tx_list(&trx_info_agg); + return trx_info; +} + + +/* + returns a vector of info of recent deadlocks + for use by information_schema.rocksdb_deadlock +*/ +std::vector rdb_get_deadlock_info() { + Rdb_snapshot_status showStatus; + Rdb_transaction::walk_tx_list(&showStatus); + return showStatus.get_deadlock_info(); +} + +#ifdef MARIAROCKS_NOT_YET +/* Generate the snapshot status table */ +static bool rocksdb_show_snapshot_status(handlerton *const hton, THD *const thd, + stat_print_fn *const stat_print) { + Rdb_snapshot_status showStatus; + + Rdb_transaction::walk_tx_list(&showStatus); + showStatus.populate_deadlock_buffer(); + + /* Send the result data back to MySQL */ + return print_stats(thd, "rocksdb", "", showStatus.getResult(), stat_print); +} +#endif + +/* + This is called for SHOW ENGINE ROCKSDB STATUS | LOGS | etc. + + For now, produce info about live files (which gives an imprecise idea about + what column families are there). +*/ +static bool rocksdb_show_status(handlerton *const hton, THD *const thd, + stat_print_fn *const stat_print, + enum ha_stat_type stat_type) { + DBUG_ASSERT(hton != nullptr); + DBUG_ASSERT(thd != nullptr); + DBUG_ASSERT(stat_print != nullptr); + + bool res = false; + char buf[100] = {'\0'}; + + if (stat_type == HA_ENGINE_STATUS) { + DBUG_ASSERT(rdb != nullptr); + + std::string str; + + /* Global DB Statistics */ + if (rocksdb_stats) { + str = rocksdb_stats->ToString(); + + // Use the same format as internal RocksDB statistics entries to make + // sure that output will look unified. + DBUG_ASSERT(commit_latency_stats != nullptr); + + snprintf(buf, sizeof(buf), + "rocksdb.commit_latency statistics " + "Percentiles :=> 50 : %.2f 95 : %.2f " + "99 : %.2f 100 : %.2f\n", + commit_latency_stats->Percentile(50), + commit_latency_stats->Percentile(95), + commit_latency_stats->Percentile(99), + commit_latency_stats->Percentile(100)); + str.append(buf); + + uint64_t v = 0; + + // Retrieve additional stalling related numbers from RocksDB and append + // them to the buffer meant for displaying detailed statistics. The intent + // here is to avoid adding another row to the query output because of + // just two numbers. + // + // NB! We're replacing hyphens with underscores in output to better match + // the existing naming convention. + if (rdb->GetIntProperty("rocksdb.is-write-stopped", &v)) { + snprintf(buf, sizeof(buf), "rocksdb.is_write_stopped COUNT : %llu\n", (ulonglong)v); + str.append(buf); + } + + if (rdb->GetIntProperty("rocksdb.actual-delayed-write-rate", &v)) { + snprintf(buf, sizeof(buf), + "COUNT : %llu\n", + (ulonglong)v); + str.append(buf); + } + + res |= print_stats(thd, "STATISTICS", "rocksdb", str, stat_print); + } + + /* Per DB stats */ + if (rdb->GetProperty("rocksdb.dbstats", &str)) { + res |= print_stats(thd, "DBSTATS", "rocksdb", str, stat_print); + } + + /* Per column family stats */ + for (const auto &cf_name : cf_manager.get_cf_names()) { + rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name); + if (cfh == nullptr) { + continue; + } + + if (!rdb->GetProperty(cfh, "rocksdb.cfstats", &str)) { + continue; + } + + res |= print_stats(thd, "CF_COMPACTION", cf_name, str, stat_print); + } + + /* Memory Statistics */ + std::vector dbs; + std::unordered_set cache_set; + size_t internal_cache_count = 0; + size_t kDefaultInternalCacheSize = 8 * 1024 * 1024; + + dbs.push_back(rdb); + cache_set.insert(rocksdb_tbl_options->block_cache.get()); + + for (const auto &cf_handle : cf_manager.get_all_cf()) { + rocksdb::ColumnFamilyDescriptor cf_desc; + cf_handle->GetDescriptor(&cf_desc); + auto *const table_factory = cf_desc.options.table_factory.get(); + + if (table_factory != nullptr) { + std::string tf_name = table_factory->Name(); + + if (tf_name.find("BlockBasedTable") != std::string::npos) { + const rocksdb::BlockBasedTableOptions *const bbt_opt = + reinterpret_cast( + table_factory->GetOptions()); + + if (bbt_opt != nullptr) { + if (bbt_opt->block_cache.get() != nullptr) { + cache_set.insert(bbt_opt->block_cache.get()); + } else { + internal_cache_count++; + } + cache_set.insert(bbt_opt->block_cache_compressed.get()); + } + } + } + } + + std::map temp_usage_by_type; + str.clear(); + rocksdb::MemoryUtil::GetApproximateMemoryUsageByType(dbs, cache_set, + &temp_usage_by_type); + snprintf(buf, sizeof(buf), "\nMemTable Total: %llu", + (ulonglong)temp_usage_by_type[rocksdb::MemoryUtil::kMemTableTotal]); + str.append(buf); + snprintf(buf, sizeof(buf), "\nMemTable Unflushed: %llu", + (ulonglong)temp_usage_by_type[rocksdb::MemoryUtil::kMemTableUnFlushed]); + str.append(buf); + snprintf(buf, sizeof(buf), "\nTable Readers Total: %llu", + (ulonglong)temp_usage_by_type[rocksdb::MemoryUtil::kTableReadersTotal]); + str.append(buf); + snprintf(buf, sizeof(buf), "\nCache Total: %llu", + (ulonglong)temp_usage_by_type[rocksdb::MemoryUtil::kCacheTotal]); + str.append(buf); + snprintf(buf, sizeof(buf), "\nDefault Cache Capacity: %llu", + (ulonglong)internal_cache_count * kDefaultInternalCacheSize); + str.append(buf); + res |= print_stats(thd, "MEMORY_STATS", "rocksdb", str, stat_print); + + /* Show the background thread status */ + std::vector thread_list; + rocksdb::Status s = rdb->GetEnv()->GetThreadList(&thread_list); + + if (!s.ok()) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Returned error (%s) from GetThreadList.\n", + s.ToString().c_str()); + res |= true; + } else { + /* For each background thread retrieved, print out its information */ + for (auto &it : thread_list) { + /* Only look at background threads. Ignore user threads, if any. */ + if (it.thread_type > rocksdb::ThreadStatus::LOW_PRIORITY) { + continue; + } + + str = "\nthread_type: " + it.GetThreadTypeName(it.thread_type) + + "\ncf_name: " + it.cf_name + + "\noperation_type: " + it.GetOperationName(it.operation_type) + + "\noperation_stage: " + + it.GetOperationStageName(it.operation_stage) + + "\nelapsed_time_ms: " + it.MicrosToString(it.op_elapsed_micros); + + for (auto &it_props : it.InterpretOperationProperties( + it.operation_type, it.op_properties)) { + str += "\n" + it_props.first + ": " + std::to_string(it_props.second); + } + + str += "\nstate_type: " + it.GetStateName(it.state_type); + + res |= print_stats(thd, "BG_THREADS", std::to_string(it.thread_id), str, + stat_print); + } + } + +#ifdef MARIAROCKS_NOT_YET + /* Explicit snapshot information */ + str = Rdb_explicit_snapshot::dump_snapshots(); +#endif + + if (!str.empty()) { + res |= print_stats(thd, "EXPLICIT_SNAPSHOTS", "rocksdb", str, stat_print); + } +#ifdef MARIAROCKS_NOT_YET + } else if (stat_type == HA_ENGINE_TRX) { + /* Handle the SHOW ENGINE ROCKSDB TRANSACTION STATUS command */ + res |= rocksdb_show_snapshot_status(hton, thd, stat_print); +#endif + } + return res; +} + +static inline void rocksdb_register_tx(handlerton *const hton, THD *const thd, + Rdb_transaction *const tx) { + DBUG_ASSERT(tx != nullptr); + + trans_register_ha(thd, FALSE, rocksdb_hton, 0); + if (my_core::thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { + tx->start_stmt(); + trans_register_ha(thd, TRUE, rocksdb_hton, 0); + } +} + +static const char *ha_rocksdb_exts[] = {NullS}; + +#ifdef MARIAROCKS_NOT_YET +static bool rocksdb_explicit_snapshot( + handlerton *const /* hton */, /*!< in: RocksDB handlerton */ + THD *const thd, /*!< in: MySQL thread handle */ + snapshot_info_st *ss_info) /*!< out: Snapshot information */ +{ + switch (ss_info->op) { + case snapshot_operation::SNAPSHOT_CREATE: { + if (mysql_bin_log_is_open()) { + mysql_bin_log_lock_commits(ss_info); + } + auto s = Rdb_explicit_snapshot::create(ss_info, rdb, rdb->GetSnapshot()); + if (mysql_bin_log_is_open()) { + mysql_bin_log_unlock_commits(ss_info); + } + + thd->set_explicit_snapshot(s); + return s == nullptr; + } + case snapshot_operation::SNAPSHOT_ATTACH: { + auto s = Rdb_explicit_snapshot::get(ss_info->snapshot_id); + if (!s) { + return true; + } + *ss_info = s->ss_info; + thd->set_explicit_snapshot(s); + return false; + } + case snapshot_operation::SNAPSHOT_RELEASE: { + if (!thd->get_explicit_snapshot()) { + return true; + } + *ss_info = thd->get_explicit_snapshot()->ss_info; + thd->set_explicit_snapshot(nullptr); + return false; + } + default: + DBUG_ASSERT(false); + return true; + } + return true; +} +#endif + +/* + Supporting START TRANSACTION WITH CONSISTENT [ROCKSDB] SNAPSHOT + + Features: + 1. Supporting START TRANSACTION WITH CONSISTENT SNAPSHOT + 2. Getting current binlog position in addition to #1. + + The second feature is done by START TRANSACTION WITH + CONSISTENT ROCKSDB SNAPSHOT. This is Facebook's extension, and + it works like existing START TRANSACTION WITH CONSISTENT INNODB SNAPSHOT. + + - When not setting engine, START TRANSACTION WITH CONSISTENT SNAPSHOT + takes both InnoDB and RocksDB snapshots, and both InnoDB and RocksDB + participate in transaction. When executing COMMIT, both InnoDB and + RocksDB modifications are committed. Remember that XA is not supported yet, + so mixing engines is not recommended anyway. + + - When setting engine, START TRANSACTION WITH CONSISTENT.. takes + snapshot for the specified engine only. But it starts both + InnoDB and RocksDB transactions. +*/ +static int rocksdb_start_tx_and_assign_read_view( + handlerton *const hton, /*!< in: RocksDB handlerton */ + THD *const thd /*!< in: MySQL thread handle of the + user for whom the transaction should + be committed */ +) +#ifdef MARIAROCKS_NOT_YET + snapshot_info_st *ss_info) /*!< in/out: Snapshot info like binlog file, pos, + gtid executed and snapshot ID */ +#endif +{ + ulong const tx_isolation = my_core::thd_tx_isolation(thd); + + if (tx_isolation != ISO_REPEATABLE_READ) { + my_error(ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT, MYF(0)); + return HA_EXIT_FAILURE; + } + +#ifdef MARIADB_NOT_YET + if (ss_info) { + if (mysql_bin_log_is_open()) { + mysql_bin_log_lock_commits(ss_info); + } else { + return HA_EXIT_FAILURE; + } +#endif + + /* + MariaDB: there is no need to call mysql_bin_log_lock_commits and then + unlock back. + SQL layer calls start_consistent_snapshot() for all engines, including the + binlog under LOCK_commit_ordered mutex. + + The mutex prevents binlog commits from happening (right?) while the storage + engine(s) allocate read snapshots. That way, each storage engine is + synchronized with current binlog position. + */ + mysql_mutex_assert_owner(&LOCK_commit_ordered); + + Rdb_transaction *const tx = get_or_create_tx(thd); + Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd)); + + DBUG_ASSERT(!tx->has_snapshot()); + tx->set_tx_read_only(true); + rocksdb_register_tx(hton, thd, tx); + tx->acquire_snapshot(true); + +#ifdef MARIADB_NOT_YET + if (ss_info) { + mysql_bin_log_unlock_commits(ss_info); + } +#endif + return HA_EXIT_SUCCESS; +} + +#ifdef MARIADB_NOT_YET +static int rocksdb_start_tx_with_shared_read_view( + handlerton *const hton, /*!< in: RocksDB handlerton */ + THD *const thd) /*!< in: MySQL thread handle of the + user for whom the transaction should + be committed */ +#ifdef MARIADB_NOT_YET + snapshot_info_st *ss_info) /*!< out: Snapshot info like binlog file, pos, + gtid executed and snapshot ID */ +#endif +{ + DBUG_ASSERT(thd != nullptr); + + int error = HA_EXIT_SUCCESS; + + ulong const tx_isolation = my_core::thd_tx_isolation(thd); + if (tx_isolation != ISO_REPEATABLE_READ) { + my_error(ER_ISOLATION_LEVEL_WITH_CONSISTENT_SNAPSHOT, MYF(0)); + return HA_EXIT_FAILURE; + } + + Rdb_transaction *tx = nullptr; +#ifdef MARIADB_NOT_YET + std::shared_ptr explicit_snapshot; + const auto op = ss_info->op; + + DBUG_ASSERT(op == snapshot_operation::SNAPSHOT_CREATE || + op == snapshot_operation::SNAPSHOT_ATTACH); + + // case: if binlogs are available get binlog file/pos and gtid info + if (op == snapshot_operation::SNAPSHOT_CREATE && mysql_bin_log_is_open()) { + mysql_bin_log_lock_commits(ss_info); + } + + if (op == snapshot_operation::SNAPSHOT_ATTACH) { + explicit_snapshot = Rdb_explicit_snapshot::get(ss_info->snapshot_id); + if (!explicit_snapshot) { + my_printf_error(ER_UNKNOWN_ERROR, "Snapshot %llu does not exist", MYF(0), + ss_info->snapshot_id); + error = HA_EXIT_FAILURE; + } + } +#endif + + // case: all good till now + if (error == HA_EXIT_SUCCESS) { + tx = get_or_create_tx(thd); + Rdb_perf_context_guard guard(tx, rocksdb_perf_context_level(thd)); + +#ifdef MARIADB_NOT_YET + if (explicit_snapshot) { + tx->m_explicit_snapshot = explicit_snapshot; + } +#endif + + DBUG_ASSERT(!tx->has_snapshot()); + tx->set_tx_read_only(true); + rocksdb_register_tx(hton, thd, tx); + tx->acquire_snapshot(true); + +#ifdef MARIADB_NOT_YET + // case: an explicit snapshot was not assigned to this transaction + if (!tx->m_explicit_snapshot) { + tx->m_explicit_snapshot = + Rdb_explicit_snapshot::create(ss_info, rdb, tx->m_read_opts.snapshot); + if (!tx->m_explicit_snapshot) { + my_printf_error(ER_UNKNOWN_ERROR, "Could not create snapshot", MYF(0)); + error = HA_EXIT_FAILURE; + } + } +#endif + } + +#ifdef MARIADB_NOT_YET + // case: unlock the binlog + if (op == snapshot_operation::SNAPSHOT_CREATE && mysql_bin_log_is_open()) { + mysql_bin_log_unlock_commits(ss_info); + } + + DBUG_ASSERT(error == HA_EXIT_FAILURE || tx->m_explicit_snapshot); + + // copy over the snapshot details to pass to the upper layers + if (tx->m_explicit_snapshot) { + *ss_info = tx->m_explicit_snapshot->ss_info; + ss_info->op = op; + } +#endif + + return error; +} +#endif + +/* Dummy SAVEPOINT support. This is needed for long running transactions + * like mysqldump (https://bugs.mysql.com/bug.php?id=71017). + * Current SAVEPOINT does not correctly handle ROLLBACK and does not return + * errors. This needs to be addressed in future versions (Issue#96). + */ +static int rocksdb_savepoint(handlerton *const hton, THD *const thd, + void *const savepoint) { + return HA_EXIT_SUCCESS; +} + +static int rocksdb_rollback_to_savepoint(handlerton *const hton, THD *const thd, + void *const savepoint) { + Rdb_transaction *tx = get_tx_from_thd(thd); + return tx->rollback_to_savepoint(savepoint); +} + +static bool rocksdb_rollback_to_savepoint_can_release_mdl( + handlerton *const /* hton */, THD *const /* thd */) { + return true; +} + +#ifdef MARIAROCKS_NOT_YET +/* + This is called for INFORMATION_SCHEMA +*/ +static void rocksdb_update_table_stats( + /* per-table stats callback */ + void (*cb)(const char *db, const char *tbl, bool is_partition, + my_io_perf_t *r, my_io_perf_t *w, my_io_perf_t *r_blob, + my_io_perf_t *r_primary, my_io_perf_t *r_secondary, + page_stats_t *page_stats, comp_stats_t *comp_stats, + int n_lock_wait, int n_lock_wait_timeout, int n_lock_deadlock, + const char *engine)) { + my_io_perf_t io_perf_read; + my_io_perf_t io_perf_write; + my_io_perf_t io_perf; + page_stats_t page_stats; + comp_stats_t comp_stats; + uint lock_wait_timeout_stats; + uint deadlock_stats; + uint lock_wait_stats; + std::vector tablenames; + + /* + Most of these are for innodb, so setting them to 0. + TODO: possibly separate out primary vs. secondary index reads + */ + memset(&io_perf, 0, sizeof(io_perf)); + memset(&page_stats, 0, sizeof(page_stats)); + memset(&comp_stats, 0, sizeof(comp_stats)); + memset(&io_perf_write, 0, sizeof(io_perf_write)); + + tablenames = rdb_open_tables.get_table_names(); + + for (const auto &it : tablenames) { + Rdb_table_handler *table_handler; + std::string str, dbname, tablename, partname; + char dbname_sys[NAME_LEN + 1]; + char tablename_sys[NAME_LEN + 1]; + bool is_partition; + + if (rdb_normalize_tablename(it, &str) != HA_EXIT_SUCCESS) { + /* Function needs to return void because of the interface and we've + * detected an error which shouldn't happen. There's no way to let + * caller know that something failed. + */ + SHIP_ASSERT(false); + return; + } + + if (rdb_split_normalized_tablename(str, &dbname, &tablename, &partname)) { + continue; + } + + is_partition = (partname.size() != 0); + + table_handler = rdb_open_tables.get_table_handler(it.c_str()); + if (table_handler == nullptr) { + continue; + } + + io_perf_read.bytes = table_handler->m_io_perf_read.bytes.load(); + io_perf_read.requests = table_handler->m_io_perf_read.requests.load(); + io_perf_write.bytes = table_handler->m_io_perf_write.bytes.load(); + io_perf_write.requests = table_handler->m_io_perf_write.requests.load(); + lock_wait_timeout_stats = table_handler->m_lock_wait_timeout_counter.load(); + deadlock_stats = table_handler->m_deadlock_counter.load(); + lock_wait_stats = + table_handler->m_table_perf_context.m_value[PC_KEY_LOCK_WAIT_COUNT] + .load(); + + /* + Convert from rocksdb timer to mysql timer. RocksDB values are + in nanoseconds, but table statistics expect the value to be + in my_timer format. + */ + io_perf_read.svc_time = my_core::microseconds_to_my_timer( + table_handler->m_io_perf_read.svc_time.load() / 1000); + io_perf_read.svc_time_max = my_core::microseconds_to_my_timer( + table_handler->m_io_perf_read.svc_time_max.load() / 1000); + io_perf_read.wait_time = my_core::microseconds_to_my_timer( + table_handler->m_io_perf_read.wait_time.load() / 1000); + io_perf_read.wait_time_max = my_core::microseconds_to_my_timer( + table_handler->m_io_perf_read.wait_time_max.load() / 1000); + io_perf_read.slow_ios = table_handler->m_io_perf_read.slow_ios.load(); + rdb_open_tables.release_table_handler(table_handler); + + /* + Table stats expects our database and table name to be in system encoding, + not filename format. Convert before calling callback. + */ + my_core::filename_to_tablename(dbname.c_str(), dbname_sys, + sizeof(dbname_sys)); + my_core::filename_to_tablename(tablename.c_str(), tablename_sys, + sizeof(tablename_sys)); + (*cb)(dbname_sys, tablename_sys, is_partition, &io_perf_read, + &io_perf_write, &io_perf, &io_perf, &io_perf, &page_stats, + &comp_stats, lock_wait_stats, lock_wait_timeout_stats, deadlock_stats, + rocksdb_hton_name); + } +} +#endif +static rocksdb::Status check_rocksdb_options_compatibility( + const char *const dbpath, const rocksdb::Options &main_opts, + const std::vector &cf_descr) { + DBUG_ASSERT(rocksdb_datadir != nullptr); + + rocksdb::DBOptions loaded_db_opt; + std::vector loaded_cf_descs; + rocksdb::Status status = + LoadLatestOptions(dbpath, rocksdb::Env::Default(), &loaded_db_opt, + &loaded_cf_descs, rocksdb_ignore_unknown_options); + + // If we're starting from scratch and there are no options saved yet then this + // is a valid case. Therefore we can't compare the current set of options to + // anything. + if (status.IsNotFound()) { + return rocksdb::Status::OK(); + } + + if (!status.ok()) { + return status; + } + + if (loaded_cf_descs.size() != cf_descr.size()) { + return rocksdb::Status::NotSupported( + "Mismatched size of column family " + "descriptors."); + } + + // Please see RocksDB documentation for more context about why we need to set + // user-defined functions and pointer-typed options manually. + for (size_t i = 0; i < loaded_cf_descs.size(); i++) { + loaded_cf_descs[i].options.compaction_filter = + cf_descr[i].options.compaction_filter; + loaded_cf_descs[i].options.compaction_filter_factory = + cf_descr[i].options.compaction_filter_factory; + loaded_cf_descs[i].options.comparator = cf_descr[i].options.comparator; + loaded_cf_descs[i].options.memtable_factory = + cf_descr[i].options.memtable_factory; + loaded_cf_descs[i].options.merge_operator = + cf_descr[i].options.merge_operator; + loaded_cf_descs[i].options.prefix_extractor = + cf_descr[i].options.prefix_extractor; + loaded_cf_descs[i].options.table_factory = + cf_descr[i].options.table_factory; + } + + // This is the essence of the function - determine if it's safe to open the + // database or not. + status = CheckOptionsCompatibility(dbpath, rocksdb::Env::Default(), main_opts, + loaded_cf_descs, + rocksdb_ignore_unknown_options); + + return status; +} + +bool prevent_myrocks_loading= false; + + +/* + Storage Engine initialization function, invoked when plugin is loaded. +*/ + +static int rocksdb_init_func(void *const p) { + + DBUG_ENTER_FUNC(); + + if (prevent_myrocks_loading) + { + my_error(ER_INTERNAL_ERROR, MYF(0), + "Loading MyRocks plugin after it has been unloaded is not " + "supported. Please restart mysqld"); + DBUG_RETURN(1); + } + + if (rdb_check_rocksdb_corruption()) { + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: There was a corruption detected in RockDB files. " + "Check error log emitted earlier for more details."); + if (rocksdb_allow_to_start_after_corruption) { + // NO_LINT_DEBUG + sql_print_information( + "RocksDB: Remove rocksdb_allow_to_start_after_corruption to prevent " + "server operating if RocksDB corruption is detected."); + } else { + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: The server will exit normally and stop restart " + "attempts. Remove %s file from data directory and " + "start mysqld manually.", + rdb_corruption_marker_file_name().c_str()); + exit(0); + } + } + + // Validate the assumption about the size of ROCKSDB_SIZEOF_HIDDEN_PK_COLUMN. + static_assert(sizeof(longlong) == 8, "Assuming that longlong is 8 bytes."); + + init_rocksdb_psi_keys(); + + rocksdb_hton = (handlerton *)p; + + rdb_open_tables.init(); + Ensure_cleanup rdb_open_tables_cleanup([]() { rdb_open_tables.free(); }); + +#ifdef HAVE_PSI_INTERFACE + rdb_bg_thread.init(rdb_signal_bg_psi_mutex_key, rdb_signal_bg_psi_cond_key); + rdb_drop_idx_thread.init(rdb_signal_drop_idx_psi_mutex_key, + rdb_signal_drop_idx_psi_cond_key); + rdb_mc_thread.init(rdb_signal_mc_psi_mutex_key, rdb_signal_mc_psi_cond_key); +#else + rdb_bg_thread.init(); + rdb_drop_idx_thread.init(); + rdb_mc_thread.init(); +#endif + mysql_mutex_init(rdb_collation_data_mutex_key, &rdb_collation_data_mutex, + MY_MUTEX_INIT_FAST); + mysql_mutex_init(rdb_mem_cmp_space_mutex_key, &rdb_mem_cmp_space_mutex, + MY_MUTEX_INIT_FAST); + + const char* initial_rocksdb_datadir_for_ignore_dirs= rocksdb_datadir; + if (!strncmp(rocksdb_datadir, "./", 2)) + initial_rocksdb_datadir_for_ignore_dirs += 2; + ignore_db_dirs_append(initial_rocksdb_datadir_for_ignore_dirs); + +#if defined(HAVE_PSI_INTERFACE) + rdb_collation_exceptions = + new Regex_list_handler(key_rwlock_collation_exception_list); +#else + rdb_collation_exceptions = new Regex_list_handler(); +#endif + + mysql_mutex_init(rdb_sysvars_psi_mutex_key, &rdb_sysvars_mutex, + MY_MUTEX_INIT_FAST); + mysql_mutex_init(rdb_block_cache_resize_mutex_key, + &rdb_block_cache_resize_mutex, MY_MUTEX_INIT_FAST); + Rdb_transaction::init_mutex(); + + rocksdb_hton->create = rocksdb_create_handler; + rocksdb_hton->close_connection = rocksdb_close_connection; + + rocksdb_hton->prepare = rocksdb_prepare; + rocksdb_hton->prepare_ordered = NULL; // Do not need it + + rocksdb_hton->commit_by_xid = rocksdb_commit_by_xid; + rocksdb_hton->rollback_by_xid = rocksdb_rollback_by_xid; + rocksdb_hton->recover = rocksdb_recover; + + rocksdb_hton->commit_ordered= rocksdb_commit_ordered; + rocksdb_hton->commit = rocksdb_commit; + + rocksdb_hton->commit_checkpoint_request= rocksdb_checkpoint_request; + + rocksdb_hton->rollback = rocksdb_rollback; + rocksdb_hton->show_status = rocksdb_show_status; +#ifdef MARIADB_NOT_YET + rocksdb_hton->explicit_snapshot = rocksdb_explicit_snapshot; +#endif + rocksdb_hton->start_consistent_snapshot = + rocksdb_start_tx_and_assign_read_view; +#ifdef MARIADB_NOT_YET + rocksdb_hton->start_shared_snapshot = rocksdb_start_tx_with_shared_read_view; +#endif + rocksdb_hton->savepoint_set = rocksdb_savepoint; + rocksdb_hton->savepoint_rollback = rocksdb_rollback_to_savepoint; + rocksdb_hton->savepoint_rollback_can_release_mdl = + rocksdb_rollback_to_savepoint_can_release_mdl; +#ifdef MARIAROCKS_NOT_YET + rocksdb_hton->update_table_stats = rocksdb_update_table_stats; +#endif // MARIAROCKS_NOT_YET + + /* + Not needed in MariaDB: + rocksdb_hton->flush_logs = rocksdb_flush_wal; + rocksdb_hton->handle_single_table_select = rocksdb_handle_single_table_select; + + */ + + rocksdb_hton->flags = HTON_TEMPORARY_NOT_SUPPORTED | + HTON_SUPPORTS_EXTENDED_KEYS | HTON_CAN_RECREATE; + + rocksdb_hton->tablefile_extensions= ha_rocksdb_exts; + DBUG_ASSERT(!mysqld_embedded); + + if (rocksdb_db_options->max_open_files > (long)open_files_limit) { + // NO_LINT_DEBUG + sql_print_information( + "RocksDB: rocksdb_max_open_files should not be " + "greater than the open_files_limit, effective value " + "of rocksdb_max_open_files is being set to " + "open_files_limit / 2."); + rocksdb_db_options->max_open_files = open_files_limit / 2; + } else if (rocksdb_db_options->max_open_files == -2) { + rocksdb_db_options->max_open_files = open_files_limit / 2; + } + +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported + rdb_read_free_regex_handler.set_patterns(DEFAULT_READ_FREE_RPL_TABLES); +#endif + + rocksdb_stats = rocksdb::CreateDBStatistics(); + rocksdb_stats->set_stats_level( + static_cast(rocksdb_stats_level)); + rocksdb_stats_level = rocksdb_stats->get_stats_level(); + rocksdb_db_options->statistics = rocksdb_stats; + + if (rocksdb_rate_limiter_bytes_per_sec != 0) { + rocksdb_rate_limiter.reset( + rocksdb::NewGenericRateLimiter(rocksdb_rate_limiter_bytes_per_sec)); + rocksdb_db_options->rate_limiter = rocksdb_rate_limiter; + } + + rocksdb_db_options->delayed_write_rate = rocksdb_delayed_write_rate; + + std::shared_ptr myrocks_logger = std::make_shared(); + rocksdb::Status s = rocksdb::CreateLoggerFromOptions( + rocksdb_datadir, *rocksdb_db_options, &rocksdb_db_options->info_log); + if (s.ok()) { + myrocks_logger->SetRocksDBLogger(rocksdb_db_options->info_log); + } + + rocksdb_db_options->info_log = myrocks_logger; + myrocks_logger->SetInfoLogLevel( + static_cast(rocksdb_info_log_level)); + rocksdb_db_options->wal_dir = rocksdb_wal_dir; + + rocksdb_db_options->wal_recovery_mode = + static_cast(rocksdb_wal_recovery_mode); + + rocksdb_db_options->access_hint_on_compaction_start = + static_cast( + rocksdb_access_hint_on_compaction_start); + + if (rocksdb_db_options->allow_mmap_reads && + rocksdb_db_options->use_direct_reads) { + // allow_mmap_reads implies !use_direct_reads and RocksDB will not open if + // mmap_reads and direct_reads are both on. (NO_LINT_DEBUG) + sql_print_error( + "RocksDB: Can't enable both use_direct_reads " + "and allow_mmap_reads\n"); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + // Check whether the filesystem backing rocksdb_datadir allows O_DIRECT + if (rocksdb_db_options->use_direct_reads || + rocksdb_db_options->use_direct_io_for_flush_and_compaction) { + rocksdb::EnvOptions soptions; + rocksdb::Status check_status; + rocksdb::Env *const env = rocksdb_db_options->env; + + std::string fname = format_string("%s/DIRECT_CHECK", rocksdb_datadir); + if (env->FileExists(fname).ok()) { + std::unique_ptr file; + soptions.use_direct_reads = true; + check_status = env->NewSequentialFile(fname, &file, soptions); + } else { + std::unique_ptr file; + soptions.use_direct_writes = true; + check_status = env->ReopenWritableFile(fname, &file, soptions); + if (file != nullptr) { + file->Close(); + } + env->DeleteFile(fname); + } + + if (!check_status.ok()) { + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Unable to use direct io in rocksdb-datadir:" + "(%s)", + check_status.getState()); + DBUG_RETURN(HA_EXIT_FAILURE); + } + } + + if (rocksdb_db_options->allow_mmap_writes && + rocksdb_db_options->use_direct_io_for_flush_and_compaction) { + // See above comment for allow_mmap_reads. (NO_LINT_DEBUG) + sql_print_error( + "RocksDB: Can't enable both " + "use_direct_io_for_flush_and_compaction and " + "allow_mmap_writes\n"); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + if (rocksdb_db_options->allow_mmap_writes && + rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) { + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: rocksdb_flush_log_at_trx_commit needs to be 0 " + "to use allow_mmap_writes"); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + // sst_file_manager will move deleted rocksdb sst files to trash_dir + // to be deleted in a background thread. + std::string trash_dir = std::string(rocksdb_datadir) + "/trash"; + rocksdb_db_options->sst_file_manager.reset(NewSstFileManager( + rocksdb_db_options->env, myrocks_logger, trash_dir, + rocksdb_sst_mgr_rate_bytes_per_sec, true /* delete_existing_trash */)); + + std::vector cf_names; + rocksdb::Status status; + status = rocksdb::DB::ListColumnFamilies(*rocksdb_db_options, rocksdb_datadir, + &cf_names); + if (!status.ok()) { + /* + When we start on an empty datadir, ListColumnFamilies returns IOError, + and RocksDB doesn't provide any way to check what kind of error it was. + Checking system errno happens to work right now. + */ + if (status.IsIOError() +#ifndef _WIN32 + && errno == ENOENT +#endif + ) { + sql_print_information("RocksDB: Got ENOENT when listing column families"); + + // NO_LINT_DEBUG + sql_print_information( + "RocksDB: assuming that we're creating a new database"); + } else { + rdb_log_status_error(status, "Error listing column families"); + DBUG_RETURN(HA_EXIT_FAILURE); + } + } else { + // NO_LINT_DEBUG + sql_print_information("RocksDB: %ld column families found", + cf_names.size()); + } + + std::vector cf_descr; + std::vector cf_handles; + + rocksdb_tbl_options->index_type = + (rocksdb::BlockBasedTableOptions::IndexType)rocksdb_index_type; + + if (!rocksdb_tbl_options->no_block_cache) { + std::shared_ptr memory_allocator; + if (!rocksdb_cache_dump) { + size_t block_size = rocksdb_tbl_options->block_size; + rocksdb::JemallocAllocatorOptions alloc_opt; + // Limit jemalloc tcache memory usage. The range + // [block_size/4, block_size] should be enough to cover most of + // block cache allocation sizes. + alloc_opt.limit_tcache_size = true; + alloc_opt.tcache_size_lower_bound = block_size / 4; + alloc_opt.tcache_size_upper_bound = block_size; + rocksdb::Status new_alloc_status = + rocksdb::NewJemallocNodumpAllocator(alloc_opt, &memory_allocator); + if (!new_alloc_status.ok()) { + // Fallback to use default malloc/free. + rdb_log_status_error(new_alloc_status, + "Error excluding block cache from core dump"); + memory_allocator = nullptr; + DBUG_RETURN(HA_EXIT_FAILURE); + } + } + std::shared_ptr block_cache = + rocksdb_use_clock_cache + ? rocksdb::NewClockCache(rocksdb_block_cache_size) + : rocksdb::NewLRUCache( + rocksdb_block_cache_size, -1 /*num_shard_bits*/, + false /*strict_capcity_limit*/, + rocksdb_cache_high_pri_pool_ratio, memory_allocator); + if (rocksdb_sim_cache_size > 0) { + // Simulated cache enabled + // Wrap block cache inside a simulated cache and pass it to RocksDB + rocksdb_tbl_options->block_cache = + rocksdb::NewSimCache(block_cache, rocksdb_sim_cache_size, 6); + } else { + // Pass block cache to RocksDB + rocksdb_tbl_options->block_cache = block_cache; + } + } + // Using newer BlockBasedTable format version for better compression + // and better memory allocation. + // See: + // https://github.com/facebook/rocksdb/commit/9ab5adfc59a621d12357580c94451d9f7320c2dd + rocksdb_tbl_options->format_version = 2; + + if (rocksdb_collect_sst_properties) { + properties_collector_factory = + std::make_shared(&ddl_manager); + + rocksdb_set_compaction_options(nullptr, nullptr, nullptr, nullptr); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + + DBUG_ASSERT(rocksdb_table_stats_sampling_pct <= + RDB_TBL_STATS_SAMPLE_PCT_MAX); + properties_collector_factory->SetTableStatsSamplingPct( + rocksdb_table_stats_sampling_pct); + + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); + } + + if (rocksdb_persistent_cache_size_mb > 0) { + std::shared_ptr pcache; + uint64_t cache_size_bytes = rocksdb_persistent_cache_size_mb * 1024 * 1024; + status = rocksdb::NewPersistentCache( + rocksdb::Env::Default(), std::string(rocksdb_persistent_cache_path), + cache_size_bytes, myrocks_logger, true, &pcache); + if (!status.ok()) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Persistent cache returned error: (%s)", + status.getState()); + DBUG_RETURN(HA_EXIT_FAILURE); + } + rocksdb_tbl_options->persistent_cache = pcache; + } else if (strlen(rocksdb_persistent_cache_path)) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Must specify rocksdb_persistent_cache_size_mb"); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + std::unique_ptr cf_options_map(new Rdb_cf_options()); + if (!cf_options_map->init(*rocksdb_tbl_options, properties_collector_factory, + rocksdb_default_cf_options, + rocksdb_override_cf_options)) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Failed to initialize CF options map."); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + /* + If there are no column families, we're creating the new database. + Create one column family named "default". + */ + if (cf_names.size() == 0) cf_names.push_back(DEFAULT_CF_NAME); + + std::vector compaction_enabled_cf_indices; + + // NO_LINT_DEBUG + sql_print_information("RocksDB: Column Families at start:"); + for (size_t i = 0; i < cf_names.size(); ++i) { + rocksdb::ColumnFamilyOptions opts; + cf_options_map->get_cf_options(cf_names[i], &opts); + + // NO_LINT_DEBUG + sql_print_information(" cf=%s", cf_names[i].c_str()); + + // NO_LINT_DEBUG + sql_print_information(" write_buffer_size=%ld", opts.write_buffer_size); + + // NO_LINT_DEBUG + sql_print_information(" target_file_size_base=%" PRIu64, + opts.target_file_size_base); + + /* + Temporarily disable compactions to prevent a race condition where + compaction starts before compaction filter is ready. + */ + if (!opts.disable_auto_compactions) { + compaction_enabled_cf_indices.push_back(i); + opts.disable_auto_compactions = true; + } + cf_descr.push_back(rocksdb::ColumnFamilyDescriptor(cf_names[i], opts)); + } + + rocksdb::Options main_opts(*rocksdb_db_options, + cf_options_map->get_defaults()); + + rocksdb::TransactionDBOptions tx_db_options; + tx_db_options.transaction_lock_timeout = 2000; // 2 seconds + tx_db_options.custom_mutex_factory = std::make_shared(); + tx_db_options.write_policy = + static_cast(rocksdb_write_policy); + + status = + check_rocksdb_options_compatibility(rocksdb_datadir, main_opts, cf_descr); + + // We won't start if we'll determine that there's a chance of data corruption + // because of incompatible options. + if (!status.ok()) { + rdb_log_status_error( + status, "Compatibility check against existing database options failed"); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + status = rocksdb::TransactionDB::Open( + main_opts, tx_db_options, rocksdb_datadir, cf_descr, &cf_handles, &rdb); + + if (!status.ok()) { + rdb_log_status_error(status, "Error opening instance"); + DBUG_RETURN(HA_EXIT_FAILURE); + } + cf_manager.init(std::move(cf_options_map), &cf_handles); + + if (dict_manager.init(rdb, &cf_manager)) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Failed to initialize data dictionary."); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + if (binlog_manager.init(&dict_manager)) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Failed to initialize binlog manager."); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + if (ddl_manager.init(&dict_manager, &cf_manager, rocksdb_validate_tables)) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Failed to initialize DDL manager."); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + Rdb_sst_info::init(rdb); + + /* + Enable auto compaction, things needed for compaction filter are finished + initializing + */ + std::vector compaction_enabled_cf_handles; + compaction_enabled_cf_handles.reserve(compaction_enabled_cf_indices.size()); + for (const auto &index : compaction_enabled_cf_indices) { + compaction_enabled_cf_handles.push_back(cf_handles[index]); + } + + status = rdb->EnableAutoCompaction(compaction_enabled_cf_handles); + + if (!status.ok()) { + rdb_log_status_error(status, "Error enabling compaction"); + DBUG_RETURN(HA_EXIT_FAILURE); + } + +#ifndef HAVE_PSI_INTERFACE + auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME); +#else + auto err = rdb_bg_thread.create_thread(BG_THREAD_NAME, + rdb_background_psi_thread_key); +#endif + if (err != 0) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Couldn't start the background thread: (errno=%d)", + err); + DBUG_RETURN(HA_EXIT_FAILURE); + } + +#ifndef HAVE_PSI_INTERFACE + err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME); +#else + err = rdb_drop_idx_thread.create_thread(INDEX_THREAD_NAME, + rdb_drop_idx_psi_thread_key); +#endif + if (err != 0) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Couldn't start the drop index thread: (errno=%d)", + err); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + err = rdb_mc_thread.create_thread(MANUAL_COMPACTION_THREAD_NAME +#ifdef HAVE_PSI_INTERFACE + , + rdb_mc_psi_thread_key +#endif + ); + if (err != 0) { + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Couldn't start the manual compaction thread: (errno=%d)", + err); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + rdb_set_collation_exception_list(rocksdb_strict_collation_exceptions); + + if (rocksdb_pause_background_work) { + rdb->PauseBackgroundWork(); + } + + // NO_LINT_DEBUG + sql_print_information("RocksDB: global statistics using %s indexer", + STRINGIFY_ARG(RDB_INDEXER)); +#if defined(HAVE_SCHED_GETCPU) + if (sched_getcpu() == -1) { + // NO_LINT_DEBUG + sql_print_information( + "RocksDB: sched_getcpu() failed - " + "global statistics will use thread_id_indexer_t instead"); + } +#endif + + err = my_error_register(rdb_get_error_messages, HA_ERR_ROCKSDB_FIRST, + HA_ERR_ROCKSDB_LAST); + if (err != 0) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Couldn't initialize error messages"); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + + + // Creating an instance of HistogramImpl should only happen after RocksDB + // has been successfully initialized. + commit_latency_stats = new rocksdb::HistogramImpl(); + + // Construct a list of directories which will be monitored by I/O watchdog + // to make sure that we won't lose write access to them. + std::vector directories; + + // 1. Data directory. + directories.push_back(mysql_real_data_home); + + // 2. Transaction logs. + if (myrocks::rocksdb_wal_dir && *myrocks::rocksdb_wal_dir) { + directories.push_back(myrocks::rocksdb_wal_dir); + } + +#if !defined(_WIN32) && !defined(__APPLE__) + io_watchdog = new Rdb_io_watchdog(std::move(directories)); + io_watchdog->reset_timeout(rocksdb_io_write_timeout_secs); +#endif + + // NO_LINT_DEBUG + sql_print_information( + "MyRocks storage engine plugin has been successfully " + "initialized."); + + // Skip cleaning up rdb_open_tables as we've succeeded + rdb_open_tables_cleanup.skip(); + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/* + Storage Engine deinitialization function, invoked when plugin is unloaded. +*/ + +static int rocksdb_done_func(void *const p) { + DBUG_ENTER_FUNC(); + + int error = 0; + + // signal the drop index thread to stop + rdb_drop_idx_thread.signal(true); + + // Flush all memtables for not losing data, even if WAL is disabled. + rocksdb_flush_all_memtables(); + + // Stop all rocksdb background work + CancelAllBackgroundWork(rdb->GetBaseDB(), true); + + // Signal the background thread to stop and to persist all stats collected + // from background flushes and compactions. This will add more keys to a new + // memtable, but since the memtables were just flushed, it should not trigger + // a flush that can stall due to background threads being stopped. As long + // as these keys are stored in a WAL file, they can be retrieved on restart. + rdb_bg_thread.signal(true); + + // Wait for the background thread to finish. + auto err = rdb_bg_thread.join(); + if (err != 0) { + // We'll log the message and continue because we're shutting down and + // continuation is the optimal strategy. + // NO_LINT_DEBUG + sql_print_error("RocksDB: Couldn't stop the background thread: (errno=%d)", + err); + } + + // Wait for the drop index thread to finish. + err = rdb_drop_idx_thread.join(); + if (err != 0) { + // NO_LINT_DEBUG + sql_print_error("RocksDB: Couldn't stop the index thread: (errno=%d)", err); + } + + // signal the manual compaction thread to stop + rdb_mc_thread.signal(true); + // Wait for the manual compaction thread to finish. + err = rdb_mc_thread.join(); + if (err != 0) { + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Couldn't stop the manual compaction thread: (errno=%d)", err); + } + + if (rdb_open_tables.count()) { + // Looks like we are getting unloaded and yet we have some open tables + // left behind. + error = 1; + } + + rdb_open_tables.free(); + /* + destructors for static objects can be called at _exit(), + but we want to free the memory at dlclose() + */ + // MARIADB_MERGE_2019: rdb_open_tables.m_hash.~Rdb_table_set(); + mysql_mutex_destroy(&rdb_sysvars_mutex); + mysql_mutex_destroy(&rdb_block_cache_resize_mutex); + + + delete rdb_collation_exceptions; + + mysql_mutex_destroy(&rdb_collation_data_mutex); + mysql_mutex_destroy(&rdb_mem_cmp_space_mutex); + + Rdb_transaction::term_mutex(); + + for (auto &it : rdb_collation_data) { + delete it; + it = nullptr; + } + + ddl_manager.cleanup(); + binlog_manager.cleanup(); + dict_manager.cleanup(); + cf_manager.cleanup(); + + delete rdb; + rdb = nullptr; + + delete commit_latency_stats; + commit_latency_stats = nullptr; + +#if !defined(_WIN32) && !defined(__APPLE__) + delete io_watchdog; + io_watchdog = nullptr; +#endif + +// Disown the cache data since we're shutting down. +// This results in memory leaks but it improved the shutdown time. +// Don't disown when running under valgrind +#ifndef HAVE_valgrind + if (rocksdb_tbl_options->block_cache) { + rocksdb_tbl_options->block_cache->DisownData(); + } +#endif /* HAVE_valgrind */ + + /* + MariaDB: don't clear rocksdb_db_options and rocksdb_tbl_options. + MyRocks' plugin variables refer to them. + + The plugin cannot be loaded again (see prevent_myrocks_loading) but plugin + variables are processed before myrocks::rocksdb_init_func is invoked, so + they must point to valid memory. + */ + //rocksdb_db_options = nullptr; + rocksdb_db_options->statistics = nullptr; + //rocksdb_tbl_options = nullptr; + rocksdb_stats = nullptr; + + my_free(rocksdb_update_cf_options); + rocksdb_update_cf_options = nullptr; + + my_error_unregister(HA_ERR_ROCKSDB_FIRST, HA_ERR_ROCKSDB_LAST); + + /* + Prevent loading the plugin after it has been loaded and then unloaded. This + doesn't work currently. + */ + prevent_myrocks_loading= true; + + DBUG_RETURN(error); +} + +static inline void rocksdb_smart_seek(bool seek_backward, + rocksdb::Iterator *const iter, + const rocksdb::Slice &key_slice) { + if (seek_backward) { + iter->SeekForPrev(key_slice); + } else { + iter->Seek(key_slice); + } +} + +static inline void rocksdb_smart_next(bool seek_backward, + rocksdb::Iterator *const iter) { + if (seek_backward) { + iter->Prev(); + } else { + iter->Next(); + } +} + +#ifndef DBUG_OFF +// simulate that RocksDB has reported corrupted data +static void dbug_change_status_to_corrupted(rocksdb::Status *status) { + *status = rocksdb::Status::Corruption(); +} +#endif + +// If the iterator is not valid it might be because of EOF but might be due +// to IOError or corruption. The good practice is always check it. +// https://github.com/facebook/rocksdb/wiki/Iterator#error-handling +static inline bool is_valid(rocksdb::Iterator *scan_it) { + if (scan_it->Valid()) { + return true; + } else { + rocksdb::Status s = scan_it->status(); + DBUG_EXECUTE_IF("rocksdb_return_status_corrupted", + dbug_change_status_to_corrupted(&s);); + if (s.IsIOError() || s.IsCorruption()) { + if (s.IsCorruption()) { + rdb_persist_corruption_marker(); + } + rdb_handle_io_error(s, RDB_IO_ERROR_GENERAL); + } + return false; + } +} + +/** + @brief + Example of simple lock controls. The "table_handler" it creates is a + structure we will pass to each ha_rocksdb handler. Do you have to have + one of these? Well, you have pieces that are used for locking, and + they are needed to function. +*/ + +Rdb_table_handler *Rdb_open_tables_map::get_table_handler( + const char *const table_name) { + DBUG_ASSERT(table_name != nullptr); + + Rdb_table_handler *table_handler; + + std::string table_name_str(table_name); + + // First, look up the table in the hash map. + RDB_MUTEX_LOCK_CHECK(m_mutex); + const auto it = m_table_map.find(table_name_str); + if (it != m_table_map.end()) { + // Found it + table_handler = it->second; + } else { + char *tmp_name; + + // Since we did not find it in the hash map, attempt to create and add it + // to the hash map. + if (!(table_handler = reinterpret_cast(my_multi_malloc( + PSI_INSTRUMENT_ME, + MYF(MY_WME | MY_ZEROFILL), &table_handler, sizeof(*table_handler), + &tmp_name, table_name_str.length() + 1, NullS)))) { + // Allocating a new Rdb_table_handler and a new table name failed. + RDB_MUTEX_UNLOCK_CHECK(m_mutex); + return nullptr; + } + + table_handler->m_ref_count = 0; + table_handler->m_table_name_length = table_name_str.length(); + table_handler->m_table_name = tmp_name; + strmov(table_handler->m_table_name, table_name); + + m_table_map.emplace(table_name_str, table_handler); + + thr_lock_init(&table_handler->m_thr_lock); +#ifdef MARIAROCKS_NOT_YET + table_handler->m_io_perf_read.init(); + table_handler->m_io_perf_write.init(); +#endif + } + DBUG_ASSERT(table_handler->m_ref_count >= 0); + table_handler->m_ref_count++; + + RDB_MUTEX_UNLOCK_CHECK(m_mutex); + + return table_handler; +} + +std::vector rdb_get_open_table_names(void) { + return rdb_open_tables.get_table_names(); +} + +std::vector Rdb_open_tables_map::get_table_names(void) const { + const Rdb_table_handler *table_handler; + std::vector names; + + RDB_MUTEX_LOCK_CHECK(m_mutex); + for (const auto &kv : m_table_map) { + table_handler = kv.second; + DBUG_ASSERT(table_handler != nullptr); + names.push_back(table_handler->m_table_name); + } + RDB_MUTEX_UNLOCK_CHECK(m_mutex); + + return names; +} + +/* + Inspired by innobase_get_int_col_max_value from InnoDB. This returns the + maximum value a type can take on. +*/ +static ulonglong rdb_get_int_col_max_value(const Field *field) { + ulonglong max_value = 0; + switch (field->key_type()) { + case HA_KEYTYPE_BINARY: + max_value = 0xFFULL; + break; + case HA_KEYTYPE_INT8: + max_value = 0x7FULL; + break; + case HA_KEYTYPE_USHORT_INT: + max_value = 0xFFFFULL; + break; + case HA_KEYTYPE_SHORT_INT: + max_value = 0x7FFFULL; + break; + case HA_KEYTYPE_UINT24: + max_value = 0xFFFFFFULL; + break; + case HA_KEYTYPE_INT24: + max_value = 0x7FFFFFULL; + break; + case HA_KEYTYPE_ULONG_INT: + max_value = 0xFFFFFFFFULL; + break; + case HA_KEYTYPE_LONG_INT: + max_value = 0x7FFFFFFFULL; + break; + case HA_KEYTYPE_ULONGLONG: + max_value = 0xFFFFFFFFFFFFFFFFULL; + break; + case HA_KEYTYPE_LONGLONG: + max_value = 0x7FFFFFFFFFFFFFFFULL; + break; + case HA_KEYTYPE_FLOAT: + max_value = 0x1000000ULL; + break; + case HA_KEYTYPE_DOUBLE: + max_value = 0x20000000000000ULL; + break; + default: + abort(); + } + + return max_value; +} + +void ha_rocksdb::load_auto_incr_value() { + ulonglong auto_incr = 0; + bool validate_last = false, use_datadic = true; +#ifndef DBUG_OFF + DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", use_datadic = false;); + validate_last = true; +#endif + + if (use_datadic && dict_manager.get_auto_incr_val( + m_tbl_def->get_autoincr_gl_index_id(), &auto_incr)) { + update_auto_incr_val(auto_incr); + } + + // If we find nothing in the data dictionary, or if we are in debug mode, + // then call index_last to get the last value. + // + // This is needed when upgrading from a server that did not support + // persistent auto_increment, of if the table is empty. + // + // For debug mode, we are just verifying that the data dictionary value is + // greater than or equal to the maximum value in the table. + if (auto_incr == 0 || validate_last) { + auto_incr = load_auto_incr_value_from_index(); + update_auto_incr_val(auto_incr); + } + + // If we failed to find anything from the data dictionary and index, then + // initialize auto_increment to 1. + if (m_tbl_def->m_auto_incr_val == 0) { + update_auto_incr_val(1); + } +} + +ulonglong ha_rocksdb::load_auto_incr_value_from_index() { + const int save_active_index = active_index; + active_index = table->s->next_number_index; + const uint8 save_table_status = table->status; + ulonglong last_val = 0; + + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + const bool is_new_snapshot = !tx->has_snapshot(); + if (is_new_snapshot) { + tx->acquire_snapshot(true); + } + + // Do a lookup. We only need index column, so it should be index-only. + // (another reason to make it index-only is that table->read_set is not set + // appropriately and non-index-only lookup will not read the value) + const bool save_keyread_only = m_keyread_only; + m_keyread_only = true; + m_converter->set_is_key_requested(true); + + if (!index_last(table->record[0])) { + Field *field = + table->key_info[table->s->next_number_index].key_part[0].field; + ulonglong max_val = rdb_get_int_col_max_value(field); + MY_BITMAP *const old_map = dbug_tmp_use_all_columns(table, &table->read_set); + last_val = field->val_int(); + if (last_val != max_val) { + last_val++; + } +#ifndef DBUG_OFF + ulonglong dd_val; + if (last_val <= max_val) { + const auto &gl_index_id = m_tbl_def->get_autoincr_gl_index_id(); + if (dict_manager.get_auto_incr_val(gl_index_id, &dd_val) && + tx->get_auto_incr(gl_index_id) == 0) { + DBUG_ASSERT(dd_val >= last_val); + } + } +#endif + dbug_tmp_restore_column_map(&table->read_set, old_map); + } + + m_keyread_only = save_keyread_only; + if (is_new_snapshot) { + tx->release_snapshot(); + } + + table->status = save_table_status; + active_index = save_active_index; + + /* + Do what ha_rocksdb::index_end() does. + (Why don't we use index_init/index_end? class handler defines index_init + as private, for some reason). + */ + release_scan_iterator(); + + return last_val; +} + +void ha_rocksdb::update_auto_incr_val(ulonglong val) { + ulonglong auto_incr_val = m_tbl_def->m_auto_incr_val; + while ( + auto_incr_val < val && + !m_tbl_def->m_auto_incr_val.compare_exchange_weak(auto_incr_val, val)) { + // Do nothing - just loop until auto_incr_val is >= val or we successfully + // set it + } +} + +void ha_rocksdb::update_auto_incr_val_from_field() { + Field *field; + ulonglong new_val, max_val; + field = table->key_info[table->s->next_number_index].key_part[0].field; + max_val = rdb_get_int_col_max_value(field); + + MY_BITMAP *const old_map = + dbug_tmp_use_all_columns(table, &table->read_set); + new_val = field->val_int(); + // don't increment if we would wrap around + if (new_val != max_val) { + new_val++; + } + + dbug_tmp_restore_column_map(&table->read_set, old_map); + + // Only update if positive value was set for auto_incr column. + if (new_val <= max_val) { + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + tx->set_auto_incr(m_tbl_def->get_autoincr_gl_index_id(), new_val); + + // Update the in memory auto_incr value in m_tbl_def. + update_auto_incr_val(new_val); + } +} + +int ha_rocksdb::load_hidden_pk_value() { + const int save_active_index = active_index; + active_index = m_tbl_def->m_key_count - 1; + const uint8 save_table_status = table->status; + + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + const bool is_new_snapshot = !tx->has_snapshot(); + + longlong hidden_pk_id = 1; + // Do a lookup. + if (!index_last(table->record[0])) { + /* + Decode PK field from the key + */ + auto err = read_hidden_pk_id_from_rowkey(&hidden_pk_id); + if (err) { + if (is_new_snapshot) { + tx->release_snapshot(); + } + return err; + } + + hidden_pk_id++; + } + + longlong old = m_tbl_def->m_hidden_pk_val; + while (old < hidden_pk_id && + !m_tbl_def->m_hidden_pk_val.compare_exchange_weak(old, hidden_pk_id)) { + } + + if (is_new_snapshot) { + tx->release_snapshot(); + } + + table->status = save_table_status; + active_index = save_active_index; + + release_scan_iterator(); + + return HA_EXIT_SUCCESS; +} + +/* Get PK value from m_tbl_def->m_hidden_pk_info. */ +longlong ha_rocksdb::update_hidden_pk_val() { + DBUG_ASSERT(has_hidden_pk(table)); + const longlong new_val = m_tbl_def->m_hidden_pk_val++; + return new_val; +} + +/* Get the id of the hidden pk id from m_last_rowkey */ +int ha_rocksdb::read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id) { + DBUG_ASSERT(table != nullptr); + DBUG_ASSERT(has_hidden_pk(table)); + + rocksdb::Slice rowkey_slice(m_last_rowkey.ptr(), m_last_rowkey.length()); + + // Get hidden primary key from old key slice + Rdb_string_reader reader(&rowkey_slice); + if ((!reader.read(Rdb_key_def::INDEX_NUMBER_SIZE))) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + + const int length= 8; /* was Field_longlong::PACK_LENGTH in FB MySQL tree */ + const uchar *from = reinterpret_cast(reader.read(length)); + if (from == nullptr) { + /* Mem-comparable image doesn't have enough bytes */ + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + + *hidden_pk_id = rdb_netbuf_read_uint64(&from); + return HA_EXIT_SUCCESS; +} + +/** + @brief + Free lock controls. We call this whenever we close a table. If the table had + the last reference to the table_handler, then we free the memory associated + with it. +*/ + +void Rdb_open_tables_map::release_table_handler( + Rdb_table_handler *const table_handler) { + RDB_MUTEX_LOCK_CHECK(m_mutex); + + DBUG_ASSERT(table_handler != nullptr); + DBUG_ASSERT(table_handler->m_ref_count > 0); + if (!--table_handler->m_ref_count) { + // Last reference was released. Tear down the hash entry. + const auto ret MY_ATTRIBUTE((__unused__)) = + m_table_map.erase(std::string(table_handler->m_table_name)); + DBUG_ASSERT(ret == 1); // the hash entry must actually be found and deleted + my_core::thr_lock_delete(&table_handler->m_thr_lock); + my_free(table_handler); + } + + RDB_MUTEX_UNLOCK_CHECK(m_mutex); +} + +static handler *rocksdb_create_handler(my_core::handlerton *const hton, + my_core::TABLE_SHARE *const table_arg, + my_core::MEM_ROOT *const mem_root) { + return new (mem_root) ha_rocksdb(hton, table_arg); +} + +ha_rocksdb::ha_rocksdb(my_core::handlerton *const hton, + my_core::TABLE_SHARE *const table_arg) + : handler(hton, table_arg), + m_table_handler(nullptr), + m_scan_it(nullptr), + m_scan_it_skips_bloom(false), + m_scan_it_snapshot(nullptr), + m_scan_it_lower_bound(nullptr), + m_scan_it_upper_bound(nullptr), + m_tbl_def(nullptr), + m_pk_descr(nullptr), + m_key_descr_arr(nullptr), + m_pk_can_be_decoded(false), + m_pk_tuple(nullptr), + m_pk_packed_tuple(nullptr), + m_sk_packed_tuple(nullptr), + m_end_key_packed_tuple(nullptr), + m_sk_match_prefix(nullptr), + m_sk_match_prefix_buf(nullptr), + m_sk_packed_tuple_old(nullptr), + m_dup_sk_packed_tuple(nullptr), + m_dup_sk_packed_tuple_old(nullptr), + m_pack_buffer(nullptr), + m_record_buffer(nullptr), + m_lock_rows(RDB_LOCK_NONE), + m_keyread_only(false), + m_insert_with_update(false), + m_dup_pk_found(false), + m_in_rpl_delete_rows(false), + m_in_rpl_update_rows(false), + m_force_skip_unique_check(false) {} + + +const std::string &ha_rocksdb::get_table_basename() const { + return m_tbl_def->base_tablename(); +} + +/** + @return + false OK + other Error inpacking the data +*/ +bool ha_rocksdb::init_with_fields() { + DBUG_ENTER_FUNC(); + + const uint pk = table_share->primary_key; + if (pk != MAX_KEY) { + const uint key_parts = table_share->key_info[pk].user_defined_key_parts; + check_keyread_allowed(pk /*PK*/, key_parts - 1, true); + } else { + m_pk_can_be_decoded = false; + } + cached_table_flags = table_flags(); + + DBUG_RETURN(false); /* Ok */ +} + +/* + If the key is a TTL key, we may need to filter it out. + + The purpose of read filtering for tables with TTL is to ensure that + during a transaction a key which has expired already but not removed by + compaction yet is not returned to the user. + + Without this the user might be hit with problems such as disappearing + rows within a transaction, etc, because the compaction filter ignores + snapshots when filtering keys. +*/ +bool ha_rocksdb::should_hide_ttl_rec(const Rdb_key_def &kd, + const rocksdb::Slice &ttl_rec_val, + const int64_t curr_ts) { + DBUG_ASSERT(kd.has_ttl()); + DBUG_ASSERT(kd.m_ttl_rec_offset != UINT_MAX); + + /* + Curr_ts can only be 0 if there are no snapshots open. + should_hide_ttl_rec can only be called when there is >=1 snapshots, unless + we are filtering on the write path (single INSERT/UPDATE) in which case + we are passed in the current time as curr_ts. + + In the event curr_ts is 0, we always decide not to filter the record. We + also log a warning and increment a diagnostic counter. + */ + if (curr_ts == 0) { + update_row_stats(ROWS_HIDDEN_NO_SNAPSHOT); + return false; + } + + if (!rdb_is_ttl_read_filtering_enabled() || !rdb_is_ttl_enabled()) { + return false; + } + + Rdb_string_reader reader(&ttl_rec_val); + + /* + Find where the 8-byte ttl is for each record in this index. + */ + uint64 ts; + if (!reader.read(kd.m_ttl_rec_offset) || reader.read_uint64(&ts)) { + /* + This condition should never be reached since all TTL records have an + 8 byte ttl field in front. Don't filter the record out, and log an error. + */ + std::string buf; + buf = rdb_hexdump(ttl_rec_val.data(), ttl_rec_val.size(), + RDB_MAX_HEXDUMP_LEN); + const GL_INDEX_ID gl_index_id = kd.get_gl_index_id(); + // NO_LINT_DEBUG + sql_print_error( + "Decoding ttl from PK value failed, " + "for index (%u,%u), val: %s", + gl_index_id.cf_id, gl_index_id.index_id, buf.c_str()); + DBUG_ASSERT(0); + return false; + } + + /* Hide record if it has expired before the current snapshot time. */ + uint64 read_filter_ts = 0; +#ifndef DBUG_OFF + read_filter_ts += rdb_dbug_set_ttl_read_filter_ts(); +#endif + bool is_hide_ttl = + ts + kd.m_ttl_duration + read_filter_ts <= static_cast(curr_ts); + if (is_hide_ttl) { + update_row_stats(ROWS_FILTERED); + + /* increment examined row count when rows are skipped */ + THD *thd = ha_thd(); + thd->inc_examined_row_count(1); + DEBUG_SYNC(thd, "rocksdb.ttl_rows_examined"); + } + return is_hide_ttl; +} + +int ha_rocksdb::rocksdb_skip_expired_records(const Rdb_key_def &kd, + rocksdb::Iterator *const iter, + bool seek_backward) { + if (kd.has_ttl()) { + THD *thd = ha_thd(); + while (iter->Valid() && + should_hide_ttl_rec( + kd, iter->value(), + get_or_create_tx(table->in_use)->m_snapshot_timestamp)) { + DEBUG_SYNC(thd, "rocksdb.check_flags_ser"); + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } + rocksdb_smart_next(seek_backward, iter); + } + } + return HA_EXIT_SUCCESS; +} + +#ifndef DBUG_OFF +void dbug_append_garbage_at_end(rocksdb::PinnableSlice *on_disk_rec) { + std::string str(on_disk_rec->data(), on_disk_rec->size()); + on_disk_rec->Reset(); + str.append("abc"); + on_disk_rec->PinSelf(rocksdb::Slice(str)); +} + +void dbug_truncate_record(rocksdb::PinnableSlice *on_disk_rec) { + on_disk_rec->remove_suffix(on_disk_rec->size()); +} + +void dbug_modify_rec_varchar12(rocksdb::PinnableSlice *on_disk_rec) { + std::string res; + // The record is NULL-byte followed by VARCHAR(10). + // Put the NULL-byte + res.append("\0", 1); + // Then, add a valid VARCHAR(12) value. + res.append("\xC", 1); + res.append("123456789ab", 12); + + on_disk_rec->Reset(); + on_disk_rec->PinSelf(rocksdb::Slice(res)); +} + +void dbug_create_err_inplace_alter() { + my_printf_error(ER_UNKNOWN_ERROR, + "Intentional failure in inplace alter occurred.", MYF(0)); +} +#endif + +int ha_rocksdb::convert_record_from_storage_format( + const rocksdb::Slice *const key, uchar *const buf) { + DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read1", + dbug_append_garbage_at_end(&m_retrieved_record);); + DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read2", + dbug_truncate_record(&m_retrieved_record);); + DBUG_EXECUTE_IF("myrocks_simulate_bad_row_read3", + dbug_modify_rec_varchar12(&m_retrieved_record);); + + return convert_record_from_storage_format(key, &m_retrieved_record, buf); +} + +/* + @brief + Unpack the record in this->m_retrieved_record and this->m_last_rowkey from + storage format into buf (which can be table->record[0] or table->record[1]). + + @param key Table record's key in mem-comparable form. + @param buf Store record in table->record[0] format here + + @detail + If the table has blobs, the unpacked data in buf may keep pointers to the + data in this->m_retrieved_record. + + The key is only needed to check its checksum value (the checksum is in + m_retrieved_record). + + @seealso + rdb_converter::setup_read_decoders() Sets up data structures which tell + which columns to decode. + + @return + 0 OK + other Error inpacking the data +*/ + +int ha_rocksdb::convert_record_from_storage_format( + const rocksdb::Slice *const key, const rocksdb::Slice *const value, + uchar *const buf) { + return m_converter->decode(m_pk_descr, buf, key, value); +} + +int ha_rocksdb::alloc_key_buffers(const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg, + bool alloc_alter_buffers) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(m_pk_tuple == nullptr); + + std::shared_ptr *const kd_arr = tbl_def_arg->m_key_descr_arr; + + uint key_len = 0; + uint max_packed_sk_len = 0; + uint pack_key_len = 0; + uint record_len = table->s->reclength + table->s->null_bytes; + + m_pk_descr = kd_arr[pk_index(table_arg, tbl_def_arg)]; + if (has_hidden_pk(table_arg)) { + m_pk_key_parts = 1; + } else { + m_pk_key_parts = + table->key_info[table->s->primary_key].user_defined_key_parts; + key_len = table->key_info[table->s->primary_key].key_length; + } + + // move this into get_table_handler() ?? + m_pk_descr->setup(table_arg, tbl_def_arg); + + m_pk_tuple = reinterpret_cast(my_malloc(PSI_INSTRUMENT_ME, key_len, MYF(0))); + + pack_key_len = m_pk_descr->max_storage_fmt_length(); + m_pk_packed_tuple = + reinterpret_cast(my_malloc(PSI_INSTRUMENT_ME, pack_key_len, MYF(0))); + + /* Sometimes, we may use m_sk_packed_tuple for storing packed PK */ + max_packed_sk_len = pack_key_len; + for (uint i = 0; i < table_arg->s->keys; i++) { + /* Primary key was processed above */ + if (i == table_arg->s->primary_key) continue; + + // TODO: move this into get_table_handler() ?? + kd_arr[i]->setup(table_arg, tbl_def_arg); + + const uint packed_len = kd_arr[i]->max_storage_fmt_length(); + if (packed_len > max_packed_sk_len) { + max_packed_sk_len = packed_len; + } + } + + m_sk_packed_tuple = + reinterpret_cast(my_malloc(PSI_INSTRUMENT_ME, max_packed_sk_len, MYF(0))); + m_sk_match_prefix_buf = + reinterpret_cast(my_malloc(PSI_INSTRUMENT_ME, max_packed_sk_len, MYF(0))); + m_sk_packed_tuple_old = + reinterpret_cast(my_malloc(PSI_INSTRUMENT_ME, max_packed_sk_len, MYF(0))); + m_end_key_packed_tuple = + reinterpret_cast(my_malloc(PSI_INSTRUMENT_ME, max_packed_sk_len, MYF(0))); + m_pack_buffer = + reinterpret_cast(my_malloc(PSI_INSTRUMENT_ME, max_packed_sk_len, MYF(0))); + m_record_buffer = + reinterpret_cast(my_malloc(PSI_INSTRUMENT_ME, record_len, MYF(0))); + + m_scan_it_lower_bound = + reinterpret_cast(my_malloc(PSI_INSTRUMENT_ME, max_packed_sk_len, MYF(0))); + m_scan_it_upper_bound = + reinterpret_cast(my_malloc(PSI_INSTRUMENT_ME, max_packed_sk_len, MYF(0))); + + /* + If inplace alter is happening, allocate special buffers for unique + secondary index duplicate checking. + */ + if (alloc_alter_buffers) { + m_dup_sk_packed_tuple = + reinterpret_cast(my_malloc(PSI_INSTRUMENT_ME, max_packed_sk_len, MYF(0))); + m_dup_sk_packed_tuple_old = + reinterpret_cast(my_malloc(PSI_INSTRUMENT_ME, max_packed_sk_len, MYF(0))); + } + + if (m_pk_tuple == nullptr || m_pk_packed_tuple == nullptr || + m_sk_packed_tuple == nullptr || m_sk_packed_tuple_old == nullptr || + m_end_key_packed_tuple == nullptr || m_pack_buffer == nullptr || + m_scan_it_upper_bound == nullptr || m_scan_it_lower_bound == nullptr || + m_record_buffer == nullptr || + (alloc_alter_buffers && (m_dup_sk_packed_tuple == nullptr || + m_dup_sk_packed_tuple_old == nullptr))) { + // One or more of the above allocations failed. Clean up and exit + free_key_buffers(); + + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +void ha_rocksdb::free_key_buffers() { + my_free(m_pk_tuple); + m_pk_tuple = nullptr; + + my_free(m_pk_packed_tuple); + m_pk_packed_tuple = nullptr; + + my_free(m_sk_packed_tuple); + m_sk_packed_tuple = nullptr; + + my_free(m_sk_match_prefix_buf); + m_sk_match_prefix_buf = nullptr; + + my_free(m_sk_packed_tuple_old); + m_sk_packed_tuple_old = nullptr; + + my_free(m_end_key_packed_tuple); + m_end_key_packed_tuple = nullptr; + + my_free(m_pack_buffer); + m_pack_buffer = nullptr; + + my_free(m_record_buffer); + m_record_buffer = nullptr; + + my_free(m_dup_sk_packed_tuple); + m_dup_sk_packed_tuple = nullptr; + + my_free(m_dup_sk_packed_tuple_old); + m_dup_sk_packed_tuple_old = nullptr; + + my_free(m_scan_it_lower_bound); + m_scan_it_lower_bound = nullptr; + + my_free(m_scan_it_upper_bound); + m_scan_it_upper_bound = nullptr; +} + +#ifdef MARIAROCKS_NOT_YET +#endif +void ha_rocksdb::set_skip_unique_check_tables(const char *const whitelist) { + const char *const wl = + whitelist ? whitelist : DEFAULT_SKIP_UNIQUE_CHECK_TABLES; + +#if defined(HAVE_PSI_INTERFACE) + Regex_list_handler regex_handler(key_rwlock_skip_unique_check_tables); +#else + Regex_list_handler regex_handler; +#endif + + if (!regex_handler.set_patterns(wl)) { + warn_about_bad_patterns(®ex_handler, "skip_unique_check_tables"); + } + + m_skip_unique_check = regex_handler.matches(m_tbl_def->base_tablename()); +} + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::open(const char *const name, int mode, uint test_if_locked) { + DBUG_ENTER_FUNC(); + + int err = close(); + if (err) { + DBUG_RETURN(err); + } + + m_table_handler = rdb_open_tables.get_table_handler(name); + + if (m_table_handler == nullptr) { + DBUG_RETURN(HA_ERR_OUT_OF_MEM); + } + + my_core::thr_lock_data_init(&m_table_handler->m_thr_lock, &m_db_lock, + nullptr); + m_io_perf.init(&m_table_handler->m_table_perf_context, + &m_table_handler->m_io_perf_read, + &m_table_handler->m_io_perf_write, &stats); + Rdb_perf_context_guard guard(&m_io_perf, + rocksdb_perf_context_level(ha_thd())); + + std::string fullname; + err = rdb_normalize_tablename(name, &fullname); + if (err != HA_EXIT_SUCCESS) { + DBUG_RETURN(err); + } + + m_tbl_def = ddl_manager.find(fullname); + if (m_tbl_def == nullptr) { + my_error(ER_INTERNAL_ERROR, MYF(0), + "Attempt to open a table that is not present in RocksDB-SE data " + "dictionary"); + DBUG_RETURN(HA_ERR_ROCKSDB_INVALID_TABLE); + } + + m_lock_rows = RDB_LOCK_NONE; + m_key_descr_arr = m_tbl_def->m_key_descr_arr; + + /* + Full table scan actually uses primary key + (UPDATE needs to know this, otherwise it will go into infinite loop on + queries like "UPDATE tbl SET pk=pk+100") + */ + key_used_on_scan = table->s->primary_key; + + // close() above has already called free_key_buffers(). No need to do it here. + err = alloc_key_buffers(table, m_tbl_def); + + if (err) { + DBUG_RETURN(err); + } + + /* + init_with_fields() is used to initialize table flags based on the field + definitions in table->field[]. + It is called by open_binary_frm(), but that function calls the method for + a temporary ha_rocksdb object which is later destroyed. + + If we are here in ::open(), then init_with_fields() has not been called + for this object. Call it ourselves, we want all member variables to be + properly initialized. + */ + init_with_fields(); + + /* Initialize decoder */ + m_converter = std::make_shared(ha_thd(), m_tbl_def, table); + + /* + Update m_ttl_bytes address to same as Rdb_converter's m_ttl_bytes. + Remove this code after moving convert_record_to_storage_format() into + Rdb_converter class. + */ + m_ttl_bytes = m_converter->get_ttl_bytes_buffer(); + + /* + MariaDB: adjust field->part_of_key for PK columns. We can only do it here + because SE API is just relying on the HA_PRIMARY_KEY_IN_READ_INDEX which + does not allow to distinguish between unpack'able and non-unpack'able + columns. + Upstream uses handler->init_with_fields() but we don't have that call. + */ + { + if (!has_hidden_pk(table)) { + KEY *const pk_info = &table->key_info[table->s->primary_key]; + for (uint kp = 0; kp < pk_info->user_defined_key_parts; kp++) { + if (!m_pk_descr->can_unpack(kp)) { + // + uint field_index= pk_info->key_part[kp].field->field_index; + table->field[field_index]->part_of_key.clear_all(); + table->field[field_index]->part_of_key.set_bit(table->s->primary_key); + } + } + } + + for (uint key= 0; key < table->s->keys; key++) { + KEY *const key_info = &table->key_info[key]; + if (key == table->s->primary_key) + continue; + for (uint kp = 0; kp < key_info->usable_key_parts; kp++) { + uint field_index= key_info->key_part[kp].field->field_index; + if (m_key_descr_arr[key]->can_unpack(kp)) { + table->field[field_index]->part_of_key.set_bit(key); + } else { + table->field[field_index]->part_of_key.clear_bit(key); + } + } + } + } + + info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); + + /* + The following load_XXX code calls row decode functions, and they do + that without having done ::external_lock() or index_init()/rnd_init(). + (Note: this also means we're doing a read when there was no + rdb_converter::setup_field_encoders() call) + + Initialize the necessary variables for them: + */ + + /* Load auto_increment value only once on first use. */ + if (table->found_next_number_field && m_tbl_def->m_auto_incr_val == 0) { + load_auto_incr_value(); + } + + /* Load hidden pk only once on first use. */ + if (has_hidden_pk(table) && m_tbl_def->m_hidden_pk_val == 0 && + (err = load_hidden_pk_value()) != HA_EXIT_SUCCESS) { + free_key_buffers(); + DBUG_RETURN(err); + } + + /* Index block size in MyRocks: used by MySQL in query optimization */ + stats.block_size = rocksdb_tbl_options->block_size; + +#ifdef MARIAROCKS_NOT_YET // MDEV-10976 +#endif + /* Determine at open whether we should skip unique checks for this table */ + set_skip_unique_check_tables(THDVAR(ha_thd(), skip_unique_check_tables)); + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +int ha_rocksdb::close(void) { + DBUG_ENTER_FUNC(); + + m_pk_descr = nullptr; + m_key_descr_arr = nullptr; + m_converter = nullptr; + free_key_buffers(); + + if (m_table_handler != nullptr) { + rdb_open_tables.release_table_handler(m_table_handler); + m_table_handler = nullptr; + } + + // These are needed to suppress valgrind errors in rocksdb.partition + m_last_rowkey.free(); + m_sk_tails.free(); + m_sk_tails_old.free(); + m_pk_unpack_info.free(); + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +static const char *rdb_error_messages[] = { + "Table must have a PRIMARY KEY.", + "Specifying DATA DIRECTORY for an individual table is not supported.", + "Specifying INDEX DIRECTORY for an individual table is not supported.", + "RocksDB commit failed.", + "Failure during bulk load operation.", + "Found data corruption.", + "CRC checksum mismatch.", + "Invalid table.", + "Could not access RocksDB properties.", + "File I/O error during merge/sort operation.", + "RocksDB status: not found.", + "RocksDB status: corruption.", + "RocksDB status: invalid argument.", + "RocksDB status: io error.", + "RocksDB status: no space.", + "RocksDB status: merge in progress.", + "RocksDB status: incomplete.", + "RocksDB status: shutdown in progress.", + "RocksDB status: timed out.", + "RocksDB status: aborted.", + "RocksDB status: lock limit reached.", + "RocksDB status: busy.", + "RocksDB status: deadlock.", + "RocksDB status: expired.", + "RocksDB status: try again.", +}; + +static_assert((sizeof(rdb_error_messages) / sizeof(rdb_error_messages[0])) == + ((HA_ERR_ROCKSDB_LAST - HA_ERR_ROCKSDB_FIRST) + 1), + "Number of error messages doesn't match number of error codes"); + +//psergey-merge: do we need this in MariaDB: we have get_error_messages +//below... +#if 0 +static const char *rdb_get_error_message(int nr) { + return rdb_error_messages[nr - HA_ERR_ROCKSDB_FIRST]; +} +#endif + +static const char **rdb_get_error_messages(int nr) { return rdb_error_messages; } + +bool ha_rocksdb::get_error_message(const int error, String *const buf) { + DBUG_ENTER_FUNC(); + + static_assert(HA_ERR_ROCKSDB_LAST > HA_ERR_FIRST, + "HA_ERR_ROCKSDB_LAST > HA_ERR_FIRST"); + static_assert(HA_ERR_ROCKSDB_LAST > HA_ERR_LAST, + "HA_ERR_ROCKSDB_LAST > HA_ERR_LAST"); + + if (error == HA_ERR_LOCK_WAIT_TIMEOUT || error == HA_ERR_LOCK_DEADLOCK || + error == HA_ERR_ROCKSDB_STATUS_BUSY) { + Rdb_transaction *const tx = get_tx_from_thd(ha_thd()); + DBUG_ASSERT(tx != nullptr); + buf->append(tx->m_detailed_error); + DBUG_RETURN(true); + } + + if (error >= HA_ERR_ROCKSDB_FIRST && error <= HA_ERR_ROCKSDB_LAST) { + buf->append(rdb_error_messages[error - HA_ERR_ROCKSDB_FIRST]); + } + + // We can be called with the values which are < HA_ERR_FIRST because most + // MySQL internal functions will just return HA_EXIT_FAILURE in case of + // an error. + + DBUG_RETURN(false); +} + +/* + Generalized way to convert RocksDB status errors into MySQL error code, and + print error message. + + Each error code below maps to a RocksDB status code found in: + rocksdb/include/rocksdb/status.h +*/ +int ha_rocksdb::rdb_error_to_mysql(const rocksdb::Status &s, + const char *opt_msg) { + DBUG_ASSERT(!s.ok()); + + int err; + switch (s.code()) { + case rocksdb::Status::Code::kOk: + err = HA_EXIT_SUCCESS; + break; + case rocksdb::Status::Code::kNotFound: + err = HA_ERR_ROCKSDB_STATUS_NOT_FOUND; + break; + case rocksdb::Status::Code::kCorruption: + err = HA_ERR_ROCKSDB_STATUS_CORRUPTION; + break; + case rocksdb::Status::Code::kNotSupported: + err = HA_ERR_ROCKSDB_STATUS_NOT_SUPPORTED; + break; + case rocksdb::Status::Code::kInvalidArgument: + err = HA_ERR_ROCKSDB_STATUS_INVALID_ARGUMENT; + break; + case rocksdb::Status::Code::kIOError: + err = (s.IsNoSpace()) ? HA_ERR_ROCKSDB_STATUS_NO_SPACE + : HA_ERR_ROCKSDB_STATUS_IO_ERROR; + break; + case rocksdb::Status::Code::kMergeInProgress: + err = HA_ERR_ROCKSDB_STATUS_MERGE_IN_PROGRESS; + break; + case rocksdb::Status::Code::kIncomplete: + err = HA_ERR_ROCKSDB_STATUS_INCOMPLETE; + break; + case rocksdb::Status::Code::kShutdownInProgress: + err = HA_ERR_ROCKSDB_STATUS_SHUTDOWN_IN_PROGRESS; + break; + case rocksdb::Status::Code::kTimedOut: + err = HA_ERR_ROCKSDB_STATUS_TIMED_OUT; + break; + case rocksdb::Status::Code::kAborted: + err = (s.IsLockLimit()) ? HA_ERR_ROCKSDB_STATUS_LOCK_LIMIT + : HA_ERR_ROCKSDB_STATUS_ABORTED; + break; + case rocksdb::Status::Code::kBusy: + err = (s.IsDeadlock()) ? HA_ERR_ROCKSDB_STATUS_DEADLOCK + : HA_ERR_ROCKSDB_STATUS_BUSY; + break; + case rocksdb::Status::Code::kExpired: + err = HA_ERR_ROCKSDB_STATUS_EXPIRED; + break; + case rocksdb::Status::Code::kTryAgain: + err = HA_ERR_ROCKSDB_STATUS_TRY_AGAIN; + break; + default: + DBUG_ASSERT(0); + return -1; + } + + std::string errMsg; + if (s.IsLockLimit()) { + errMsg = + "Operation aborted: Failed to acquire lock due to " + "rocksdb_max_row_locks limit"; + } else { + errMsg = s.ToString(); + } + + if (opt_msg) { + std::string concatenated_error = errMsg + " (" + std::string(opt_msg) + ")"; + my_error(ER_GET_ERRMSG, MYF(0), s.code(), concatenated_error.c_str(), + rocksdb_hton_name); + } else { + my_error(ER_GET_ERRMSG, MYF(0), s.code(), errMsg.c_str(), + rocksdb_hton_name); + } + + return err; +} + +/* MyRocks supports only the following collations for indexed columns */ +static const std::set RDB_INDEX_COLLATIONS = { + COLLATION_BINARY, COLLATION_UTF8_BIN, COLLATION_LATIN1_BIN}; + +static bool rdb_is_index_collation_supported( + const my_core::Field *const field) { + const my_core::enum_field_types type = field->real_type(); + /* Handle [VAR](CHAR|BINARY) or TEXT|BLOB */ + if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING || + type == MYSQL_TYPE_BLOB) { + + return (RDB_INDEX_COLLATIONS.find(field->charset()->number) != + RDB_INDEX_COLLATIONS.end()) || + rdb_is_collation_supported(field->charset()); + } + return true; +} + + +static bool +rdb_field_uses_nopad_collation(const my_core::Field *const field) { + const my_core::enum_field_types type = field->real_type(); + /* Handle [VAR](CHAR|BINARY) or TEXT|BLOB */ + if (type == MYSQL_TYPE_VARCHAR || type == MYSQL_TYPE_STRING || + type == MYSQL_TYPE_BLOB) { + + /* + This is technically a NOPAD collation but it's a binary collation + that we can handle. + */ + if (RDB_INDEX_COLLATIONS.find(field->charset()->number) != + RDB_INDEX_COLLATIONS.end()) + return false; + + return (field->charset()->state & MY_CS_NOPAD); + } + return false; +} + + +/* + Create structures needed for storing data in rocksdb. This is called when the + table is created. The structures will be shared by all TABLE* objects. + + @param + table_arg Table with definition + db_table "dbname.tablename" + len strlen of the above + tbl_def_arg tbl_def whose key_descr is being created/populated + old_tbl_def_arg tbl_def from which keys are being copied over from + (for use during inplace alter) + + @return + 0 - Ok + other - error, either given table ddl is not supported by rocksdb or OOM. +*/ +int ha_rocksdb::create_key_defs( + const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg, + const TABLE *const old_table_arg /* = nullptr */, + const Rdb_tbl_def *const old_tbl_def_arg + /* = nullptr */) const { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(table_arg->s != nullptr); + + /* + These need to be one greater than MAX_INDEXES since the user can create + MAX_INDEXES secondary keys and no primary key which would cause us + to generate a hidden one. + */ + std::array cfs; + + /* + NOTE: All new column families must be created before new index numbers are + allocated to each key definition. See below for more details. + http://github.com/MySQLOnRocksDB/mysql-5.6/issues/86#issuecomment-138515501 + */ + if (create_cfs(table_arg, tbl_def_arg, &cfs)) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + + uint64 ttl_duration = 0; + std::string ttl_column; + uint ttl_field_offset; + + uint err; + if ((err = Rdb_key_def::extract_ttl_duration(table_arg, tbl_def_arg, + &ttl_duration))) { + DBUG_RETURN(err); + } + + if ((err = Rdb_key_def::extract_ttl_col(table_arg, tbl_def_arg, &ttl_column, + &ttl_field_offset))) { + DBUG_RETURN(err); + } + + /* We don't currently support TTL on tables with hidden primary keys. */ + if (ttl_duration > 0 && has_hidden_pk(table_arg)) { + my_error(ER_RDB_TTL_UNSUPPORTED, MYF(0)); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + /* + If TTL duration is not specified but TTL column was specified, throw an + error because TTL column requires duration. + */ + if (ttl_duration == 0 && !ttl_column.empty()) { + my_error(ER_RDB_TTL_COL_FORMAT, MYF(0), ttl_column.c_str()); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + if (!old_tbl_def_arg) { + /* + old_tbl_def doesn't exist. this means we are in the process of creating + a new table. + + Get the index numbers (this will update the next_index_number) + and create Rdb_key_def structures. + */ + for (uint i = 0; i < tbl_def_arg->m_key_count; i++) { + if (create_key_def(table_arg, i, tbl_def_arg, &m_key_descr_arr[i], cfs[i], + ttl_duration, ttl_column)) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + } + } else { + /* + old_tbl_def exists. This means we are creating a new tbl_def as part of + in-place alter table. Copy over existing keys from the old_tbl_def and + generate the necessary new key definitions if any. + */ + if (create_inplace_key_defs(table_arg, tbl_def_arg, old_table_arg, + old_tbl_def_arg, cfs, ttl_duration, + ttl_column)) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + } + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/* + Checks index parameters and creates column families needed for storing data + in rocksdb if necessary. + + @param in + table_arg Table with definition + db_table Table name + tbl_def_arg Table def structure being populated + + @param out + cfs CF info for each key definition in 'key_info' order + + @return + 0 - Ok + other - error +*/ +int ha_rocksdb::create_cfs( + const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg, + std::array *const cfs) const { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(table_arg->s != nullptr); + + char tablename_sys[NAME_LEN + 1]; + bool tsys_set= false; + + /* + The first loop checks the index parameters and creates + column families if necessary. + */ + for (uint i = 0; i < tbl_def_arg->m_key_count; i++) { + rocksdb::ColumnFamilyHandle *cf_handle; + + if (!is_hidden_pk(i, table_arg, tbl_def_arg) && + tbl_def_arg->base_tablename().find(tmp_file_prefix) != 0) { + if (!tsys_set) + { + tsys_set= true; + my_core::filename_to_tablename(tbl_def_arg->base_tablename().c_str(), + tablename_sys, sizeof(tablename_sys)); + } + + for (uint part = 0; part < table_arg->key_info[i].ext_key_parts; + part++) + { + /* MariaDB: disallow NOPAD collations */ + if (rdb_field_uses_nopad_collation( + table_arg->key_info[i].key_part[part].field)) + { + my_error(ER_MYROCKS_CANT_NOPAD_COLLATION, MYF(0)); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + if (rocksdb_strict_collation_check && + !rdb_is_index_collation_supported( + table_arg->key_info[i].key_part[part].field) && + !rdb_collation_exceptions->matches(tablename_sys)) { + + char buf[1024]; + my_snprintf(buf, sizeof(buf), + "Indexed column %s.%s uses a collation that does not " + "allow index-only access in secondary key and has " + "reduced disk space efficiency in primary key.", + tbl_def_arg->full_tablename().c_str(), + table_arg->key_info[i].key_part[part].field->field_name.str); + + my_error(ER_INTERNAL_ERROR, MYF(ME_WARNING), buf); + } + } + } + + // Internal consistency check to make sure that data in TABLE and + // Rdb_tbl_def structures matches. Either both are missing or both are + // specified. Yes, this is critical enough to make it into SHIP_ASSERT. + SHIP_ASSERT(IF_PARTITIONING(!table_arg->part_info,true) == tbl_def_arg->base_partition().empty()); + + // Generate the name for the column family to use. + bool per_part_match_found = false; + std::string cf_name = + generate_cf_name(i, table_arg, tbl_def_arg, &per_part_match_found); + + // Prevent create from using the system column family. + if (cf_name == DEFAULT_SYSTEM_CF_NAME) { + my_error(ER_WRONG_ARGUMENTS, MYF(0), + "column family not valid for storing index data."); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + // Here's how `get_or_create_cf` will use the input parameters: + // + // `cf_name` - will be used as a CF name. + cf_handle = cf_manager.get_or_create_cf(rdb, cf_name); + + if (!cf_handle) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + + auto &cf = (*cfs)[i]; + + cf.cf_handle = cf_handle; + cf.is_reverse_cf = Rdb_cf_manager::is_cf_name_reverse(cf_name.c_str()); + cf.is_per_partition_cf = per_part_match_found; + } + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/* + Create key definition needed for storing data in rocksdb during ADD index + inplace operations. + + @param in + table_arg Table with definition + tbl_def_arg New table def structure being populated + old_tbl_def_arg Old(current) table def structure + cfs Struct array which contains column family information + + @return + 0 - Ok + other - error, either given table ddl is not supported by rocksdb or OOM. +*/ +int ha_rocksdb::create_inplace_key_defs( + const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg, + const TABLE *const old_table_arg, const Rdb_tbl_def *const old_tbl_def_arg, + const std::array &cfs, + uint64 ttl_duration, const std::string &ttl_column) const { + DBUG_ENTER_FUNC(); + + std::shared_ptr *const old_key_descr = + old_tbl_def_arg->m_key_descr_arr; + std::shared_ptr *const new_key_descr = + tbl_def_arg->m_key_descr_arr; + const std::unordered_map old_key_pos = + get_old_key_positions(table_arg, tbl_def_arg, old_table_arg, + old_tbl_def_arg); + + uint i; + for (i = 0; i < tbl_def_arg->m_key_count; i++) { + const auto &it = old_key_pos.find(get_key_name(i, table_arg, tbl_def_arg)); + + if (it != old_key_pos.end()) { + /* + Found matching index in old table definition, so copy it over to the + new one created. + */ + const Rdb_key_def &okd = *old_key_descr[it->second]; + + const GL_INDEX_ID gl_index_id = okd.get_gl_index_id(); + struct Rdb_index_info index_info; + if (!dict_manager.get_index_info(gl_index_id, &index_info)) { + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Could not get index information " + "for Index Number (%u,%u), table %s", + gl_index_id.cf_id, gl_index_id.index_id, + old_tbl_def_arg->full_tablename().c_str()); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + uint32 ttl_rec_offset = + Rdb_key_def::has_index_flag(index_info.m_index_flags, + Rdb_key_def::TTL_FLAG) + ? Rdb_key_def::calculate_index_flag_offset( + index_info.m_index_flags, Rdb_key_def::TTL_FLAG) + : UINT_MAX; + + /* + We can't use the copy constructor because we need to update the + keynr within the pack_info for each field and the keyno of the keydef + itself. + */ + new_key_descr[i] = std::make_shared( + okd.get_index_number(), i, okd.get_cf(), + index_info.m_index_dict_version, index_info.m_index_type, + index_info.m_kv_version, okd.m_is_reverse_cf, + okd.m_is_per_partition_cf, okd.m_name.c_str(), + dict_manager.get_stats(gl_index_id), index_info.m_index_flags, + ttl_rec_offset, index_info.m_ttl_duration); + } else if (create_key_def(table_arg, i, tbl_def_arg, &new_key_descr[i], + cfs[i], ttl_duration, ttl_column)) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + + DBUG_ASSERT(new_key_descr[i] != nullptr); + new_key_descr[i]->setup(table_arg, tbl_def_arg); + } + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +std::unordered_map ha_rocksdb::get_old_key_positions( + const TABLE *const table_arg, const Rdb_tbl_def *const tbl_def_arg, + const TABLE *const old_table_arg, + const Rdb_tbl_def *const old_tbl_def_arg) const { + DBUG_ENTER_FUNC(); + + std::shared_ptr *const old_key_descr = + old_tbl_def_arg->m_key_descr_arr; + std::unordered_map old_key_pos; + std::unordered_map new_key_pos; + uint i; + + for (i = 0; i < tbl_def_arg->m_key_count; i++) { + new_key_pos[get_key_name(i, table_arg, tbl_def_arg)] = i; + } + + for (i = 0; i < old_tbl_def_arg->m_key_count; i++) { + if (is_hidden_pk(i, old_table_arg, old_tbl_def_arg)) { + old_key_pos[old_key_descr[i]->m_name] = i; + continue; + } + + /* + In case of matching key name, need to check key parts of keys as well, + in case a simultaneous drop + add is performed, where the key name is the + same but the key parts are different. + + Example: + CREATE TABLE t1 (a INT, b INT, KEY ka(a)) ENGINE=RocksDB; + ALTER TABLE t1 DROP INDEX ka, ADD INDEX ka(b), ALGORITHM=INPLACE; + */ + const KEY *const old_key = &old_table_arg->key_info[i]; + const auto &it = new_key_pos.find(old_key->name.str); + if (it == new_key_pos.end()) { + continue; + } + + KEY *const new_key = &table_arg->key_info[it->second]; + + /* + Check that the key is identical between old and new tables. + If not, we still need to create a new index. + + The exception is if there is an index changed from unique to non-unique, + in these cases we don't need to rebuild as they are stored the same way in + RocksDB. + */ + bool unique_to_non_unique = + ((old_key->flags ^ new_key->flags) == HA_NOSAME) && + (old_key->flags & HA_NOSAME); + + if (compare_keys(old_key, new_key) && !unique_to_non_unique) { + continue; + } + + /* Check to make sure key parts match. */ + if (compare_key_parts(old_key, new_key)) { + continue; + } + + old_key_pos[old_key->name.str] = i; + } + + DBUG_RETURN(old_key_pos); +} + +/* Check to see if two keys are identical. */ +int ha_rocksdb::compare_keys(const KEY *const old_key, + const KEY *const new_key) const { + DBUG_ENTER_FUNC(); + + /* Check index name. */ + if (strcmp(old_key->name.str, new_key->name.str) != 0) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + + /* If index algorithms are different then keys are different. */ + if (old_key->algorithm != new_key->algorithm) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + + /* Check that the key is identical between old and new tables. */ + if ((old_key->flags ^ new_key->flags) & HA_KEYFLAG_MASK) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + + /* Check index comment. (for column family changes) */ + std::string old_comment(old_key->comment.str, old_key->comment.length); + std::string new_comment(new_key->comment.str, new_key->comment.length); + if (old_comment.compare(new_comment) != 0) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/* Check two keys to ensure that key parts within keys match */ +int ha_rocksdb::compare_key_parts(const KEY *const old_key, + const KEY *const new_key) const { + DBUG_ENTER_FUNC(); + + /* Skip if key parts do not match, as it is a different key */ + if (new_key->user_defined_key_parts != old_key->user_defined_key_parts) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + + /* Check to see that key parts themselves match */ + for (uint i = 0; i < old_key->user_defined_key_parts; i++) { + if (strcmp(old_key->key_part[i].field->field_name.str, + new_key->key_part[i].field->field_name.str) != 0) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + + /* Check if prefix index key part length has changed */ + if (old_key->key_part[i].length != new_key->key_part[i].length) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + } + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/* + Create key definition needed for storing data in rocksdb. + This can be called either during CREATE table or doing ADD index operations. + + @param in + table_arg Table with definition + i Position of index being created inside table_arg->key_info + tbl_def_arg Table def structure being populated + cf_info Struct which contains column family information + + @param out + new_key_def Newly created index definition. + + @return + 0 - Ok + other - error, either given table ddl is not supported by rocksdb or OOM. +*/ +int ha_rocksdb::create_key_def(const TABLE *const table_arg, const uint i, + const Rdb_tbl_def *const tbl_def_arg, + std::shared_ptr *const new_key_def, + const struct key_def_cf_info &cf_info, + uint64 ttl_duration, + const std::string &ttl_column) const { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(*new_key_def == nullptr); + + const uint index_id = ddl_manager.get_and_update_next_number(&dict_manager); + const uint16_t index_dict_version = Rdb_key_def::INDEX_INFO_VERSION_LATEST; + uchar index_type; + uint16_t kv_version; + + if (is_hidden_pk(i, table_arg, tbl_def_arg)) { + index_type = Rdb_key_def::INDEX_TYPE_HIDDEN_PRIMARY; + kv_version = Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST; + } else if (i == table_arg->s->primary_key) { + index_type = Rdb_key_def::INDEX_TYPE_PRIMARY; + uint16 pk_latest_version = Rdb_key_def::PRIMARY_FORMAT_VERSION_LATEST; + kv_version = pk_latest_version; + } else { + index_type = Rdb_key_def::INDEX_TYPE_SECONDARY; + uint16 sk_latest_version = Rdb_key_def::SECONDARY_FORMAT_VERSION_LATEST; + kv_version = sk_latest_version; + } + + // Use PRIMARY_FORMAT_VERSION_UPDATE1 here since it is the same value as + // SECONDARY_FORMAT_VERSION_UPDATE1 so it doesn't matter if this is a + // primary key or secondary key. + DBUG_EXECUTE_IF("MYROCKS_LEGACY_VARBINARY_FORMAT", { + kv_version = Rdb_key_def::PRIMARY_FORMAT_VERSION_UPDATE1; + }); + + DBUG_EXECUTE_IF("MYROCKS_NO_COVERED_BITMAP_FORMAT", { + if (index_type == Rdb_key_def::INDEX_TYPE_SECONDARY) { + kv_version = Rdb_key_def::SECONDARY_FORMAT_VERSION_UPDATE2; + } + }); + + uint32 index_flags = (ttl_duration > 0 ? Rdb_key_def::TTL_FLAG : 0); + + uint32 ttl_rec_offset = + Rdb_key_def::has_index_flag(index_flags, Rdb_key_def::TTL_FLAG) + ? Rdb_key_def::calculate_index_flag_offset(index_flags, + Rdb_key_def::TTL_FLAG) + : UINT_MAX; + + const char *const key_name = get_key_name(i, table_arg, m_tbl_def); + *new_key_def = std::make_shared( + index_id, i, cf_info.cf_handle, index_dict_version, index_type, + kv_version, cf_info.is_reverse_cf, cf_info.is_per_partition_cf, key_name, + Rdb_index_stats(), index_flags, ttl_rec_offset, ttl_duration); + + if (!ttl_column.empty()) { + (*new_key_def)->m_ttl_column = ttl_column; + } + // initialize key_def + (*new_key_def)->setup(table_arg, tbl_def_arg); + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +int rdb_normalize_tablename(const std::string &tablename, + std::string *const strbuf) { + if (tablename.size() < 2 || tablename[0] != '.' || + (tablename[1] != FN_LIBCHAR && tablename[1] != FN_LIBCHAR2)) { + DBUG_ASSERT(0); // We were not passed table name? + return HA_ERR_ROCKSDB_INVALID_TABLE; + } + + size_t pos = tablename.find_first_of(FN_LIBCHAR, 2); + if (pos == std::string::npos) { + pos = tablename.find_first_of(FN_LIBCHAR2, 2); + } + + if (pos == std::string::npos) { + DBUG_ASSERT(0); // We were not passed table name? + return HA_ERR_ROCKSDB_INVALID_TABLE; + } + + *strbuf = tablename.substr(2, pos - 2) + "." + tablename.substr(pos + 1); + + return HA_EXIT_SUCCESS; +} + +/* + Check to see if the user's original statement includes foreign key + references +*/ +bool ha_rocksdb::contains_foreign_key(THD *const thd) { + bool success; + const char *str = thd_query_string(thd)->str; + + DBUG_ASSERT(str != nullptr); + + while (*str != '\0') { + // Scan from our current pos looking for 'FOREIGN' + str = rdb_find_in_string(str, "FOREIGN", &success); + if (!success) { + return false; + } + + // Skip past the found "FOREIGN' + str = rdb_check_next_token(&my_charset_bin, str, "FOREIGN", &success); + DBUG_ASSERT(success); + + if (!my_isspace(&my_charset_bin, *str)) { + return false; + } + + // See if the next token is 'KEY' + str = rdb_check_next_token(&my_charset_bin, str, "KEY", &success); + if (!success) { + continue; + } + + // See if the next token is '(' + str = rdb_check_next_token(&my_charset_bin, str, "(", &success); + if (!success) { + // There is an optional index id after 'FOREIGN KEY', skip it + str = rdb_skip_id(&my_charset_bin, str); + + // Now check for '(' again + str = rdb_check_next_token(&my_charset_bin, str, "(", &success); + } + + // If we have found 'FOREIGN KEY [] (' we can be confident we have + // a foreign key clause. + return success; + } + + // We never found a valid foreign key clause + return false; +} + +/** + @brief + splits the normalized table name of .#P# into + the , and components. + + @param dbbuf returns database name/table_schema + @param tablebuf returns tablename + @param partitionbuf returns partition suffix if there is one + @return HA_EXIT_SUCCESS on success, non-zero on failure to split +*/ +int rdb_split_normalized_tablename(const std::string &fullname, + std::string *const db, + std::string *const table, + std::string *const partition) { + DBUG_ASSERT(!fullname.empty()); + +#define RDB_PARTITION_STR "#P#" + + /* Normalize returns dbname.tablename. */ + size_t dotpos = fullname.find('.'); + + /* Invalid table name? */ + if (dotpos == std::string::npos) { + return HA_ERR_ROCKSDB_INVALID_TABLE; + } + + // Table must have a database name associated with it. + DBUG_ASSERT(dotpos > 0); + + if (db != nullptr) { + *db = fullname.substr(0, dotpos); + } + + dotpos++; + + const size_t partpos = + fullname.find(RDB_PARTITION_STR, dotpos, strlen(RDB_PARTITION_STR)); + + if (partpos != std::string::npos) { + DBUG_ASSERT(partpos >= dotpos); + + if (table != nullptr) { + *table = fullname.substr(dotpos, partpos - dotpos); + } + + if (partition != nullptr) { + *partition = fullname.substr(partpos + strlen(RDB_PARTITION_STR)); + } + } else if (table != nullptr) { + *table = fullname.substr(dotpos); + } + + return HA_EXIT_SUCCESS; +} + +/* + Create a table's Rdb_tbl_def and its Rdb_key_defs and store table information + into MyRocks Data Dictionary + The method is called during create table/partition, truncate table/partition + + @param table_name IN table's name formated as + 'dbname.tablename' + @param table_arg IN sql table + @param auto_increment_value IN specified table's auto increment value + + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::create_table(const std::string &table_name, + const TABLE *table_arg, + ulonglong auto_increment_value) { + DBUG_ENTER_FUNC(); + + + + int err; + + const std::unique_ptr wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); + + /* Create table/key descriptions and put them into the data dictionary */ + m_tbl_def = new Rdb_tbl_def(table_name); + + uint n_keys = table_arg->s->keys; + + /* + If no primary key found, create a hidden PK and place it inside table + definition + */ + if (has_hidden_pk(table_arg)) { + n_keys += 1; + // reset hidden pk id + // the starting valid value for hidden pk is 1 + m_tbl_def->m_hidden_pk_val = 1; + } + + m_key_descr_arr = new std::shared_ptr[n_keys]; + m_tbl_def->m_key_count = n_keys; + m_tbl_def->m_key_descr_arr = m_key_descr_arr; + + err = create_key_defs(table_arg, m_tbl_def); + if (err != HA_EXIT_SUCCESS) { + goto error; + } + + m_pk_descr = m_key_descr_arr[pk_index(table_arg, m_tbl_def)]; + + if (auto_increment_value) { + bool autoinc_upgrade_test = false; + m_tbl_def->m_auto_incr_val = auto_increment_value; + DBUG_EXECUTE_IF("myrocks_autoinc_upgrade", autoinc_upgrade_test = true;); + if (!autoinc_upgrade_test) { + auto s = dict_manager.put_auto_incr_val( + batch, m_tbl_def->get_autoincr_gl_index_id(), + m_tbl_def->m_auto_incr_val); + if (!s.ok()) { + goto error; + } + } + } + + dict_manager.lock(); + err = ddl_manager.put_and_write(m_tbl_def, batch); + if (err != HA_EXIT_SUCCESS) { + dict_manager.unlock(); + goto error; + } + + err = dict_manager.commit(batch); + if (err != HA_EXIT_SUCCESS) { + dict_manager.unlock(); + goto error; + } + + dict_manager.unlock(); + + DBUG_RETURN(HA_EXIT_SUCCESS); + +error: + /* Delete what we have allocated so far */ + delete m_tbl_def; + m_tbl_def = nullptr; + m_key_descr_arr = nullptr; + + DBUG_RETURN(err); +} + +/** + @brief + create() is called to create a table. The variable name will have the name + of the table. + + @details + When create() is called you do not need to worry about + opening the table. Also, the .frm file will have already been + created so adjusting create_info is not necessary. You can overwrite + the .frm file at this point if you wish to change the table + definition, but there are no methods currently provided for doing + so. + + Called from handle.cc by ha_create_table(). + + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) + + @see + ha_create_table() in handle.cc +*/ + +int ha_rocksdb::create(const char *const name, TABLE *const table_arg, + HA_CREATE_INFO *const create_info) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(table_arg != nullptr); + DBUG_ASSERT(create_info != nullptr); + + if (create_info->data_file_name) { + // DATA DIRECTORY is used to create tables under a specific location + // outside the MySQL data directory. We don't support this for MyRocks. + // The `rocksdb_datadir` setting should be used to configure RocksDB data + // directory. + DBUG_RETURN(HA_ERR_ROCKSDB_TABLE_DATA_DIRECTORY_NOT_SUPPORTED); + } + + if (create_info->index_file_name) { + // Similar check for INDEX DIRECTORY as well. + DBUG_RETURN(HA_ERR_ROCKSDB_TABLE_INDEX_DIRECTORY_NOT_SUPPORTED); + } + + int err; + /* + Construct dbname.tablename ourselves, because parititioning + passes strings like "./test/t14#P#p0" for individual partitions, + while table_arg->s->table_name has none of that. + */ + std::string str; + err = rdb_normalize_tablename(name, &str); + if (err != HA_EXIT_SUCCESS) { + DBUG_RETURN(err); + } + + // FOREIGN KEY isn't supported yet + THD *const thd = my_core::thd_get_current_thd(); + if (contains_foreign_key(thd)) { + my_error(ER_NOT_SUPPORTED_YET, MYF(0), + "FOREIGN KEY for the RocksDB storage engine"); + DBUG_RETURN(HA_ERR_UNSUPPORTED); + } + + // Check whether Data Dictionary contain information + Rdb_tbl_def *tbl = ddl_manager.find(str); + if (tbl != nullptr) { + if (thd->lex->sql_command == SQLCOM_TRUNCATE) { + err = delete_table(tbl); + if (err != HA_EXIT_SUCCESS) { + DBUG_RETURN(err); + } + } else { + my_error(ER_METADATA_INCONSISTENCY, MYF(0), str.c_str(), name); + DBUG_RETURN(HA_ERR_ROCKSDB_CORRUPT_DATA); + } + } + + // The below adds/clears hooks in RocksDB sync points. There's no reason for + // this code to be in ::create() but it needs to be somewhere where it is + // away from any tight loops and where one can invoke it from mtr: + DBUG_EXECUTE_IF("rocksdb_enable_delay_commits", + { + auto syncpoint= rocksdb::SyncPoint::GetInstance(); + syncpoint->SetCallBack("DBImpl::WriteImpl:BeforeLeaderEnters", + [&](void* /*arg*/) {my_sleep(500);} ); + syncpoint->EnableProcessing(); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_WRONG_ARGUMENTS, + "enable_delay_commits_mode ON"); + + }); + DBUG_EXECUTE_IF("rocksdb_disable_delay_commits", + { + auto syncpoint= rocksdb::SyncPoint::GetInstance(); + syncpoint->ClearCallBack("DBImpl::WriteImpl:BeforeLeaderEnters"); + syncpoint->DisableProcessing(); + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_WRONG_ARGUMENTS, + "enable_delay_commits_mode OFF"); + }); + + DBUG_RETURN(create_table(str, table_arg, create_info->auto_increment_value)); +} + +/** + @note + This function is used only when the table has not yet been opened, and + keyread_allowed bitmap doesn't have the correct values yet. + + See comment in ha_rocksdb::index_flags() for details. +*/ + +bool ha_rocksdb::check_keyread_allowed(uint inx, uint part, + bool all_parts) const { + bool res = true; + KEY *const key_info = &table_share->key_info[inx]; + + Rdb_field_packing dummy1; + res = dummy1.setup(nullptr, key_info->key_part[part].field, inx, part, + key_info->key_part[part].length); + + if (res && all_parts) { + for (uint i = 0; i < part; i++) { + Field *field; + if ((field = key_info->key_part[i].field)) { + Rdb_field_packing dummy; + if (!dummy.setup(nullptr, field, inx, i, + key_info->key_part[i].length)) { + /* Cannot do index-only reads for this column */ + res = false; + break; + } + } + } + } + + const uint pk = table_share->primary_key; + if (inx == pk && all_parts && + part + 1 == table_share->key_info[pk].user_defined_key_parts) { + m_pk_can_be_decoded = res; + } + + return res; +} + +int ha_rocksdb::read_key_exact(const Rdb_key_def &kd, + rocksdb::Iterator *const iter, + const bool /* unused */, + const rocksdb::Slice &key_slice, + const int64_t ttl_filter_ts) { + THD *thd = ha_thd(); + /* + We are looking for the first record such that + index_tuple= lookup_tuple. + lookup_tuple may be a prefix of the index. + */ + rocksdb_smart_seek(kd.m_is_reverse_cf, iter, key_slice); + + while (iter->Valid() && kd.value_matches_prefix(iter->key(), key_slice)) { + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } + /* + If TTL is enabled we need to check if the given key has already expired + from the POV of the current transaction. If it has, try going to the next + key. + */ + if (kd.has_ttl() && should_hide_ttl_rec(kd, iter->value(), ttl_filter_ts)) { + rocksdb_smart_next(kd.m_is_reverse_cf, iter); + continue; + } + + return HA_EXIT_SUCCESS; + } + + /* + Got a record that is not equal to the lookup value, or even a record + from another table.index. + */ + return HA_ERR_KEY_NOT_FOUND; +} + +int ha_rocksdb::read_before_key(const Rdb_key_def &kd, + const bool full_key_match, + const rocksdb::Slice &key_slice, + const int64_t ttl_filter_ts) { + THD *thd = ha_thd(); + /* + We are looking for record with the biggest t.key such that + t.key < lookup_tuple. + */ + rocksdb_smart_seek(!kd.m_is_reverse_cf, m_scan_it, key_slice); + + while (is_valid(m_scan_it)) { + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } + /* + We are using full key and we've hit an exact match, or... + + If TTL is enabled we need to check if the given key has already expired + from the POV of the current transaction. If it has, try going to the next + key. + */ + if ((full_key_match && + kd.value_matches_prefix(m_scan_it->key(), key_slice)) || + (kd.has_ttl() && + should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts))) { + rocksdb_smart_next(!kd.m_is_reverse_cf, m_scan_it); + continue; + } + + return HA_EXIT_SUCCESS; + } + + return HA_ERR_KEY_NOT_FOUND; +} + +int ha_rocksdb::read_after_key(const Rdb_key_def &kd, + const rocksdb::Slice &key_slice, + const int64_t ttl_filter_ts) { + THD *thd = ha_thd(); + /* + We are looking for the first record such that + + index_tuple $GT lookup_tuple + + with HA_READ_AFTER_KEY, $GT = '>', + with HA_READ_KEY_OR_NEXT, $GT = '>=' + */ + rocksdb_smart_seek(kd.m_is_reverse_cf, m_scan_it, key_slice); + + /* + If TTL is enabled we need to check if the given key has already expired + from the POV of the current transaction. If it has, try going to the next + key. + */ + while (is_valid(m_scan_it) && kd.has_ttl() && + should_hide_ttl_rec(kd, m_scan_it->value(), ttl_filter_ts)) { + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } + rocksdb_smart_next(kd.m_is_reverse_cf, m_scan_it); + } + + return is_valid(m_scan_it) ? HA_EXIT_SUCCESS : HA_ERR_KEY_NOT_FOUND; +} + +int ha_rocksdb::position_to_correct_key( + const Rdb_key_def &kd, const enum ha_rkey_function &find_flag, + const bool full_key_match, const uchar *const key, + const key_part_map &keypart_map, const rocksdb::Slice &key_slice, + bool *const move_forward, const int64_t ttl_filter_ts) { + int rc = 0; + + *move_forward = true; + + switch (find_flag) { + case HA_READ_KEY_EXACT: + rc = read_key_exact(kd, m_scan_it, full_key_match, key_slice, + ttl_filter_ts); + break; + case HA_READ_BEFORE_KEY: + *move_forward = false; + rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts); + if (rc == 0 && !kd.covers_key(m_scan_it->key())) { + /* The record we've got is not from this index */ + rc = HA_ERR_KEY_NOT_FOUND; + } + break; + case HA_READ_AFTER_KEY: + case HA_READ_KEY_OR_NEXT: + rc = read_after_key(kd, key_slice, ttl_filter_ts); + if (rc == 0 && !kd.covers_key(m_scan_it->key())) { + /* The record we've got is not from this index */ + rc = HA_ERR_KEY_NOT_FOUND; + } + break; + case HA_READ_KEY_OR_PREV: + case HA_READ_PREFIX: + /* This flag is not used by the SQL layer, so we don't support it yet. */ + rc = HA_ERR_UNSUPPORTED; + break; + case HA_READ_PREFIX_LAST: + case HA_READ_PREFIX_LAST_OR_PREV: + *move_forward = false; + /* + Find the last record with the specified index prefix lookup. + - HA_READ_PREFIX_LAST requires that the record has the + prefix=lookup (if there are no such records, + HA_ERR_KEY_NOT_FOUND should be returned). + - HA_READ_PREFIX_LAST_OR_PREV has no such requirement. If there are no + records with prefix=lookup, we should return the last record + before that. + */ + rc = read_before_key(kd, full_key_match, key_slice, ttl_filter_ts); + if (rc == 0) { + const rocksdb::Slice &rkey = m_scan_it->key(); + if (!kd.covers_key(rkey)) { + /* The record we've got is not from this index */ + rc = HA_ERR_KEY_NOT_FOUND; + } else if (find_flag == HA_READ_PREFIX_LAST) { + uint size = kd.pack_index_tuple(table, m_pack_buffer, + m_sk_packed_tuple, m_record_buffer, + key, keypart_map); + rocksdb::Slice lookup_tuple( + reinterpret_cast(m_sk_packed_tuple), size); + + // We need to compare the key we've got with the original search + // prefix. + if (!kd.value_matches_prefix(rkey, lookup_tuple)) { + rc = HA_ERR_KEY_NOT_FOUND; + } + } + } + break; + default: + DBUG_ASSERT(0); + break; + } + + return rc; +} + +int ha_rocksdb::calc_eq_cond_len(const Rdb_key_def &kd, + const enum ha_rkey_function &find_flag, + const rocksdb::Slice &slice, + const int bytes_changed_by_succ, + const key_range *const end_key, + uint *const end_key_packed_size) { + if (find_flag == HA_READ_KEY_EXACT) return slice.size(); + + if (find_flag == HA_READ_PREFIX_LAST) { + /* + We have made the kd.successor(m_sk_packed_tuple) call above. + + The slice is at least Rdb_key_def::INDEX_NUMBER_SIZE bytes long. + */ + return slice.size() - bytes_changed_by_succ; + } + + if (end_key) { + *end_key_packed_size = + kd.pack_index_tuple(table, m_pack_buffer, m_end_key_packed_tuple, + m_record_buffer, end_key->key, end_key->keypart_map); + + /* + Calculating length of the equal conditions here. 4 byte index id is + included. + Example1: id1 BIGINT, id2 INT, id3 BIGINT, PRIMARY KEY (id1, id2, id3) + WHERE id1=1 AND id2=1 AND id3>=2 => eq_cond_len= 4+8+4= 16 + WHERE id1=1 AND id2>=1 AND id3>=2 => eq_cond_len= 4+8= 12 + Example2: id1 VARCHAR(30), id2 INT, PRIMARY KEY (id1, id2) + WHERE id1 = 'AAA' and id2 < 3; => eq_cond_len=13 (varchar used 9 bytes) + */ + rocksdb::Slice end_slice(reinterpret_cast(m_end_key_packed_tuple), + *end_key_packed_size); + return slice.difference_offset(end_slice); + } + + /* + On range scan without any end key condition, there is no + eq cond, and eq cond length is the same as index_id size (4 bytes). + Example1: id1 BIGINT, id2 INT, id3 BIGINT, PRIMARY KEY (id1, id2, id3) + WHERE id1>=1 AND id2 >= 2 and id2 <= 5 => eq_cond_len= 4 + */ + return Rdb_key_def::INDEX_NUMBER_SIZE; +} + +int ha_rocksdb::read_row_from_primary_key(uchar *const buf) { + int rc; + const rocksdb::Slice &rkey = m_scan_it->key(); + const uint pk_size = rkey.size(); + const char *pk_data = rkey.data(); + + memcpy(m_pk_packed_tuple, pk_data, pk_size); + m_last_rowkey.copy(pk_data, pk_size, &my_charset_bin); + + if (m_lock_rows != RDB_LOCK_NONE) { + /* We need to put a lock and re-read */ + rc = get_row_by_rowid(buf, m_pk_packed_tuple, pk_size); + } else { + /* Unpack from the row we've read */ + const rocksdb::Slice &value = m_scan_it->value(); + rc = convert_record_from_storage_format(&rkey, &value, buf); + } + + return rc; +} + +int ha_rocksdb::read_row_from_secondary_key(uchar *const buf, + const Rdb_key_def &kd, + bool move_forward) { + int rc = 0; + uint pk_size; + + /* Get the key columns and primary key value */ + const rocksdb::Slice &rkey = m_scan_it->key(); + const rocksdb::Slice &value = m_scan_it->value(); + +#ifndef DBUG_OFF + bool save_keyread_only = m_keyread_only; +#endif + DBUG_EXECUTE_IF("dbug.rocksdb.HA_EXTRA_KEYREAD", { m_keyread_only = true; }); + + bool covered_lookup = (m_keyread_only && kd.can_cover_lookup()) || + kd.covers_lookup(&value, &m_lookup_bitmap); + +#ifndef DBUG_OFF + m_keyread_only = save_keyread_only; +#endif + + if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) { + pk_size = + kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, m_pk_packed_tuple); + if (pk_size == RDB_INVALID_KEY_LEN) { + rc = HA_ERR_ROCKSDB_CORRUPT_DATA; + } else { + rc = kd.unpack_record(table, buf, &rkey, &value, + m_converter->get_verify_row_debug_checksums()); + global_stats.covered_secondary_key_lookups.inc(); + } + } else { + if (kd.m_is_reverse_cf) move_forward = !move_forward; + + rc = find_icp_matching_index_rec(move_forward, buf); + if (!rc) { + const rocksdb::Slice &rkey = m_scan_it->key(); + pk_size = kd.get_primary_key_tuple(table, *m_pk_descr, &rkey, + m_pk_packed_tuple); + if (pk_size == RDB_INVALID_KEY_LEN) { + rc = HA_ERR_ROCKSDB_CORRUPT_DATA; + } else { + rc = get_row_by_rowid(buf, m_pk_packed_tuple, pk_size); + } + } + } + + if (!rc) { + m_last_rowkey.copy((const char *)m_pk_packed_tuple, pk_size, + &my_charset_bin); + } + + return rc; +} + +/** + @note + The problem with this function is that SQL layer calls it, when + - the table has not been yet opened (no ::open() call done) + - this->table_share already exists, but it is in the process of being + filled, so some of fields are still NULL. + - In particular, table_share->key_info[inx].key_part[] is filled only up + to part #part. Subsequent key parts are not yet filled. + + To complicate things further, SQL layer will call index_flags() with + all_parts=TRUE. Essentially, we're asked to provide flags for reading + keyparts whose datatype is not yet known. + + We walk around this problem by using check_keyread_allowed(), which uses + table_share object and is careful not to step on unitialized data. + + When we get a call with all_parts=TRUE, we try to analyze all parts but + ignore those that have key_part->field==nullptr (these are not initialized + yet). +*/ + +ulong ha_rocksdb::index_flags(uint inx, uint part, bool all_parts) const { + DBUG_ENTER_FUNC(); + + ulong base_flags = HA_READ_NEXT | // doesn't seem to be used + HA_READ_ORDER | HA_READ_RANGE | HA_READ_PREV; + + if (check_keyread_allowed(inx, part, all_parts)) { + base_flags |= HA_KEYREAD_ONLY; + } + + if (inx == table_share->primary_key) { + /* + Index-only reads on primary key are the same as table scan for us. Still, + we need to explicitly "allow" them, otherwise SQL layer will miss some + plans. + */ + base_flags |= HA_KEYREAD_ONLY | HA_CLUSTERED_INDEX; + } else { + /* + We can Index Condition Pushdown any key except the primary. With primary + key, we get (pk, record) pair immediately, there is no place to put the + ICP check. + */ + base_flags |= HA_DO_INDEX_COND_PUSHDOWN; + } + + DBUG_RETURN(base_flags); +} + +/** + @brief + Read next index tuple through the secondary index. + + @details + m_scan_it points at the index key-value pair that we should read the (pk,row) + pair for. +*/ +int ha_rocksdb::secondary_index_read(const int keyno, uchar *const buf) { + DBUG_ASSERT(table != nullptr); +#ifdef MARIAROCKS_NOT_YET + stats.rows_requested++; +#endif + /* Use STATUS_NOT_FOUND when record not found or some error occurred */ + table->status = STATUS_NOT_FOUND; + + if (is_valid(m_scan_it)) { + rocksdb::Slice key = m_scan_it->key(); + + /* Check if we've ran out of records of this index */ + if (m_key_descr_arr[keyno]->covers_key(key)) { + int rc = 0; + + // TODO: We could here check if we have ran out of range we're scanning + const uint size = m_key_descr_arr[keyno]->get_primary_key_tuple( + table, *m_pk_descr, &key, m_pk_packed_tuple); + if (size == RDB_INVALID_KEY_LEN) { + return HA_ERR_ROCKSDB_CORRUPT_DATA; + } + + m_last_rowkey.copy((const char *)m_pk_packed_tuple, size, + &my_charset_bin); + + rocksdb::Slice value = m_scan_it->value(); + bool covered_lookup = + (m_keyread_only && m_key_descr_arr[keyno]->can_cover_lookup()) || + m_key_descr_arr[keyno]->covers_lookup(&value, &m_lookup_bitmap); + if (covered_lookup && m_lock_rows == RDB_LOCK_NONE) { + rc = m_key_descr_arr[keyno]->unpack_record( + table, buf, &key, &value, + m_converter->get_verify_row_debug_checksums()); + global_stats.covered_secondary_key_lookups.inc(); + } else { + DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete_sk"); + rc = get_row_by_rowid(buf, m_pk_packed_tuple, size); + } + + if (!rc) { + table->status = 0; +#ifdef MARIAROCKS_NOT_YET + stats.rows_read++; + stats.rows_index_next++; +#endif + update_row_stats(ROWS_READ); + } + return rc; + } + } + return HA_ERR_END_OF_FILE; +} + +bool ha_rocksdb::is_using_full_key(key_part_map keypart_map, + uint actual_key_parts) +{ + return (keypart_map == HA_WHOLE_KEY) || + (keypart_map == ((key_part_map(1) << actual_key_parts) + - 1)); +} + +/* + ha_rocksdb::read_range_first overrides handler::read_range_first. + The only difference from handler::read_range_first is that + ha_rocksdb::read_range_first passes end_key to + ha_rocksdb::index_read_map_impl function. + + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::read_range_first(const key_range *const start_key, + const key_range *const end_key, + bool eq_range_arg, bool sorted) { + DBUG_ENTER_FUNC(); + + int result; + + eq_range = eq_range_arg; + + /* MariaDB: Pass RANGE_SCAN_ASC when we support ICP on backwards scans */ + set_end_range(end_key); + + range_key_part = table->key_info[active_index].key_part; + + if (!start_key) { + // Read first record + result = ha_index_first(table->record[0]); + } else { +#ifdef MARIAROCKS_NOT_YET + if (is_using_prohibited_gap_locks( + is_using_full_unique_key(active_index, start_key->keypart_map, + start_key->flag))) { + DBUG_RETURN(HA_ERR_LOCK_DEADLOCK); + } +#endif + increment_statistics(&SSV::ha_read_key_count); + + result = + index_read_map_impl(table->record[0], start_key->key, + start_key->keypart_map, start_key->flag, end_key); + } + if (result) { + DBUG_RETURN((result == HA_ERR_KEY_NOT_FOUND) ? HA_ERR_END_OF_FILE : result); + } + + if (compare_key(end_range) <= 0) { + DBUG_RETURN(HA_EXIT_SUCCESS); + } else { + /* + The last read row does not fall in the range. So request + storage engine to release row lock if possible. + */ + unlock_row(); + DBUG_RETURN(HA_ERR_END_OF_FILE); + } +} + +int ha_rocksdb::prepare_index_scan() +{ + range_key_part= table->key_info[active_index].key_part; + set_end_range(NULL); + return 0; +} + + +int ha_rocksdb::prepare_range_scan(const key_range *start_key, + const key_range *end_key) +{ + range_key_part= table->key_info[active_index].key_part; + + if (start_key) + { + m_save_start_range= *start_key; + m_start_range= &m_save_start_range; + } + else + m_start_range= NULL; + + set_end_range(end_key); + return 0; +} + + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ + +int ha_rocksdb::index_read_map(uchar *const buf, const uchar *const key, + key_part_map keypart_map, + enum ha_rkey_function find_flag) { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(index_read_map_impl(buf, key, keypart_map, find_flag, nullptr)); +} + +/* + See storage/rocksdb/rocksdb-range-access.txt for description of how MySQL + index navigation commands are converted into RocksDB lookup commands. + + This function takes end_key as an argument, and it is set on range scan. + MyRocks needs to decide whether prefix bloom filter can be used or not. + To decide to use prefix bloom filter or not, calculating equal condition + length + is needed. On equal lookups (find_flag == HA_READ_KEY_EXACT), equal + condition length is the same as rocksdb::Slice.size() of the start key. + On range scan, equal condition length is MIN(start_key, end_key) of the + rocksdb::Slice expression. + + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::index_read_map_impl(uchar *const buf, const uchar *const key, + key_part_map keypart_map, + enum ha_rkey_function find_flag, + const key_range *end_key) { + DBUG_ENTER_FUNC(); + + DBUG_EXECUTE_IF("myrocks_busy_loop_on_row_read", int debug_i = 0; + while (1) { debug_i++; }); + + int rc = 0; + + THD *thd = ha_thd(); + DEBUG_SYNC(thd, "rocksdb.check_flags_rmi"); + if (thd && thd->killed) { + rc = HA_ERR_QUERY_INTERRUPTED; + DBUG_RETURN(rc); + } + + const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + const uint actual_key_parts = kd.get_key_parts(); + bool using_full_key = is_using_full_key(keypart_map, actual_key_parts); + + if (!end_key) end_key = end_range; + + /* By default, we don't need the retrieved records to match the prefix */ + m_sk_match_prefix = nullptr; +#ifdef MARIAROCKS_NOT_YET + stats.rows_requested++; +#endif + if (active_index == table->s->primary_key && find_flag == HA_READ_KEY_EXACT && + using_full_key) { + /* + Equality lookup over primary key, using full tuple. + This is a special case, use DB::Get. + */ + const uint size = kd.pack_index_tuple(table, m_pack_buffer, + m_pk_packed_tuple, m_record_buffer, + key, keypart_map); + bool skip_lookup = is_blind_delete_enabled(); + + rc = get_row_by_rowid(buf, m_pk_packed_tuple, size, skip_lookup, false); + + if (!rc && !skip_lookup) { +#ifdef MARIAROCKS_NOT_YET + stats.rows_read++; + stats.rows_index_first++; +#endif + update_row_stats(ROWS_READ); + } + DBUG_RETURN(rc); + } + + /* + Unique secondary index performs lookups without the extended key fields + */ + uint packed_size; + if (active_index != table->s->primary_key && + table->key_info[active_index].flags & HA_NOSAME && + find_flag == HA_READ_KEY_EXACT && using_full_key) { + key_part_map tmp_map = (key_part_map(1) << table->key_info[active_index] + .user_defined_key_parts) - + 1; + packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, + m_record_buffer, key, tmp_map); + if (table->key_info[active_index].user_defined_key_parts != + kd.get_key_parts()) { + using_full_key = false; + } + } else { + packed_size = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, + m_record_buffer, key, keypart_map); + } + + if ((pushed_idx_cond && pushed_idx_cond_keyno == active_index) && + (find_flag == HA_READ_KEY_EXACT || find_flag == HA_READ_PREFIX_LAST)) { + /* + We are doing a point index lookup, and ICP is enabled. It is possible + that this call will be followed by ha_rocksdb->index_next_same() call. + + Do what InnoDB does: save the lookup tuple now. We will need it in + index_next_same/find_icp_matching_index_rec in order to stop scanning + as soon as index record doesn't match the lookup tuple. + + When not using ICP, handler::index_next_same() will make sure that rows + that don't match the lookup prefix are not returned. + row matches the lookup prefix. + */ + m_sk_match_prefix = m_sk_match_prefix_buf; + m_sk_match_length = packed_size; + memcpy(m_sk_match_prefix, m_sk_packed_tuple, packed_size); + } + + int bytes_changed_by_succ = 0; + if (find_flag == HA_READ_PREFIX_LAST_OR_PREV || + find_flag == HA_READ_PREFIX_LAST || find_flag == HA_READ_AFTER_KEY) { + /* See below */ + bytes_changed_by_succ = kd.successor(m_sk_packed_tuple, packed_size); + } + + rocksdb::Slice slice(reinterpret_cast(m_sk_packed_tuple), + packed_size); + + uint end_key_packed_size = 0; + /* + In MariaDB, the end_key is always the bigger end of the range. + If we are doing a reverse-ordered scan (that is, walking from the bigger + key values to smaller), we should use the smaller end of range as end_key. + */ + const key_range *cur_end_key= end_key; + if (find_flag == HA_READ_PREFIX_LAST_OR_PREV || + find_flag == HA_READ_BEFORE_KEY) + { + cur_end_key= m_start_range; + } + + const uint eq_cond_len = + calc_eq_cond_len(kd, find_flag, slice, bytes_changed_by_succ, cur_end_key, + &end_key_packed_size); + + bool use_all_keys = false; + if (find_flag == HA_READ_KEY_EXACT && + my_count_bits(keypart_map) == kd.get_key_parts()) { + use_all_keys = true; + } + + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + const bool is_new_snapshot = !tx->has_snapshot(); + // Loop as long as we get a deadlock error AND we end up creating the + // snapshot here (i.e. it did not exist prior to this) + for (;;) { + DEBUG_SYNC(thd, "rocksdb.check_flags_rmi_scan"); + if (thd && thd->killed) { + rc = HA_ERR_QUERY_INTERRUPTED; + break; + } + /* + This will open the iterator and position it at a record that's equal or + greater than the lookup tuple. + */ + setup_scan_iterator(kd, &slice, use_all_keys, eq_cond_len); + + /* + Once we are positioned on from above, move to the position we really + want: See storage/rocksdb/rocksdb-range-access.txt + */ + bool move_forward; + rc = + position_to_correct_key(kd, find_flag, using_full_key, key, keypart_map, + slice, &move_forward, tx->m_snapshot_timestamp); + + if (rc) { + break; + } + + m_skip_scan_it_next_call = false; + + /* + Now get the data for the row into 'buf'. If we were using a primary key + then we have all the rows we need. For a secondary key we now need to + lookup the primary key. + */ + if (active_index == table->s->primary_key) { + rc = read_row_from_primary_key(buf); + } else { + rc = read_row_from_secondary_key(buf, kd, move_forward); + } + + if (!should_recreate_snapshot(rc, is_new_snapshot)) { + break; /* Exit the loop */ + } + + // release the snapshot and iterator so they will be regenerated + tx->release_snapshot(); + release_scan_iterator(); + } + + if (rc) { + /* + This status is returned on any error + the only possible error condition is record-not-found + */ + table->status = STATUS_NOT_FOUND; + } else { + table->status = 0; +#ifdef MARIAROCKS_NOT_YET + stats.rows_read++; + stats.rows_index_first++; +#endif + update_row_stats(ROWS_READ); + } + + DBUG_RETURN(rc); +} + +/* + @brief + Scan the secondary index until we find an index record that satisfies ICP + + @param move_forward TRUE <=> move m_scan_it forward + FALSE <=> move m_scan_it backward + @param buf Record buffer (must be the same buffer that + pushed index condition points to, in practice + it is table->record[0]) + + @detail + Move the current iterator m_scan_it until we get an index tuple that + satisfies the pushed Index Condition. + (if there is no pushed index condition, return right away) + + @return + 0 - Index tuple satisfies ICP, can do index read. + other - error code +*/ + +int ha_rocksdb::find_icp_matching_index_rec(const bool move_forward, + uchar *const buf) { + if (pushed_idx_cond && pushed_idx_cond_keyno == active_index) { + const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + THD *thd = ha_thd(); + + while (1) { + int rc = rocksdb_skip_expired_records(kd, m_scan_it, !move_forward); + if (rc != HA_EXIT_SUCCESS) { + return rc; + } + + if (thd && thd->killed) { + return HA_ERR_QUERY_INTERRUPTED; + } + + if (!is_valid(m_scan_it)) { + table->status = STATUS_NOT_FOUND; + return HA_ERR_END_OF_FILE; + } + const rocksdb::Slice rkey = m_scan_it->key(); + + if (!kd.covers_key(rkey)) { + table->status = STATUS_NOT_FOUND; + return HA_ERR_END_OF_FILE; + } + + if (m_sk_match_prefix) { + const rocksdb::Slice prefix((const char *)m_sk_match_prefix, + m_sk_match_length); + if (!kd.value_matches_prefix(rkey, prefix)) { + table->status = STATUS_NOT_FOUND; + return HA_ERR_END_OF_FILE; + } + } + + const rocksdb::Slice value = m_scan_it->value(); + int err = kd.unpack_record(table, buf, &rkey, &value, + m_converter->get_verify_row_debug_checksums()); + if (err != HA_EXIT_SUCCESS) { + return err; + } + + const check_result_t icp_status= handler_index_cond_check(this); + if (icp_status == CHECK_NEG) { + rocksdb_smart_next(!move_forward, m_scan_it); + continue; /* Get the next (or prev) index tuple */ + } + else if (icp_status == CHECK_OUT_OF_RANGE || + icp_status == CHECK_ABORTED_BY_USER) { + /* We have walked out of range we are scanning */ + table->status = STATUS_NOT_FOUND; + return HA_ERR_END_OF_FILE; + } + else /* icp_status == CHECK_POS */ + { + /* Index Condition is satisfied. We have rc==0, proceed to fetch the + * row. */ + break; + } + } + } + return HA_EXIT_SUCCESS; +} + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ + +/** + @return + HA_ADMIN_OK OK + other HA_ADMIN error code +*/ +int ha_rocksdb::check(THD *const thd, HA_CHECK_OPT *const check_opt) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(thd != nullptr); + DBUG_ASSERT(check_opt != nullptr); + + const uint pk = pk_index(table, m_tbl_def); + String rowkey_copy; + String sec_key_copy; + const char *const table_name = table->s->table_name.str; + + // Only when debugging: don't use snapshot when reading + // Rdb_transaction *tx= get_or_create_tx(table->in_use); + // tx->snapshot= nullptr; + + bool save_verify_row_debug_checksums = + m_converter->get_verify_row_debug_checksums(); + m_converter->set_verify_row_debug_checksums(true); + /* For each secondary index, check that we can get a PK value from it */ + // NO_LINT_DEBUG + sql_print_verbose_info("CHECKTABLE %s: Checking table %s", table_name, + table_name); + ha_rows UNINIT_VAR(row_checksums_at_start); // set/used iff first_index==true + ha_rows row_checksums = ha_rows(-1); + bool first_index = true; + + for (uint keyno = 0; keyno < table->s->keys; keyno++) { + if (keyno != pk) { + extra(HA_EXTRA_KEYREAD); + ha_index_init(keyno, true); + ha_rows rows = 0; + ha_rows checksums = 0; + if (first_index) { + row_checksums_at_start = m_converter->get_row_checksums_checked(); + } + int res; + // NO_LINT_DEBUG + sql_print_verbose_info("CHECKTABLE %s: Checking index %s", table_name, + table->key_info[keyno].name.str); + while (1) { + if (!rows) { + res = index_first(table->record[0]); + } else { + res = index_next(table->record[0]); + } + + if (res == HA_ERR_END_OF_FILE) break; + if (res) { + // error + // NO_LINT_DEBUG + sql_print_error("CHECKTABLE %s: .. row %lld: index scan error %d", + table_name, rows, res); + goto error; + } + rocksdb::Slice key = m_scan_it->key(); + sec_key_copy.copy(key.data(), key.size(), &my_charset_bin); + rowkey_copy.copy(m_last_rowkey.ptr(), m_last_rowkey.length(), + &my_charset_bin); + + if (m_key_descr_arr[keyno]->unpack_info_has_checksum( + m_scan_it->value())) { + checksums++; + } + + if ((res = get_row_by_rowid(table->record[0], rowkey_copy.ptr(), + rowkey_copy.length()))) { + // NO_LINT_DEBUG + sql_print_error( + "CHECKTABLE %s: .. row %lld: " + "failed to fetch row by rowid", + table_name, rows); + goto error; + } + + longlong hidden_pk_id = 0; + if (has_hidden_pk(table) && + read_hidden_pk_id_from_rowkey(&hidden_pk_id)) { + goto error; + } + + /* Check if we get the same PK value */ + uint packed_size = m_pk_descr->pack_record( + table, m_pack_buffer, table->record[0], m_pk_packed_tuple, nullptr, + false, hidden_pk_id); + if (packed_size != rowkey_copy.length() || + memcmp(m_pk_packed_tuple, rowkey_copy.ptr(), packed_size)) { + // NO_LINT_DEBUG + sql_print_error("CHECKTABLE %s: .. row %lld: PK value mismatch", + table_name, rows); + goto print_and_error; + } + + /* Check if we get the same secondary key value */ + packed_size = m_key_descr_arr[keyno]->pack_record( + table, m_pack_buffer, table->record[0], m_sk_packed_tuple, + &m_sk_tails, false, hidden_pk_id); + if (packed_size != sec_key_copy.length() || + memcmp(m_sk_packed_tuple, sec_key_copy.ptr(), packed_size)) { + // NO_LINT_DEBUG + sql_print_error( + "CHECKTABLE %s: .. row %lld: " + "secondary index value mismatch", + table_name, rows); + goto print_and_error; + } + rows++; + continue; + + print_and_error : { + std::string buf; + buf = rdb_hexdump(rowkey_copy.ptr(), rowkey_copy.length(), + RDB_MAX_HEXDUMP_LEN); + // NO_LINT_DEBUG + sql_print_error("CHECKTABLE %s: rowkey: %s", table_name, buf.c_str()); + + buf = rdb_hexdump(m_retrieved_record.data(), m_retrieved_record.size(), + RDB_MAX_HEXDUMP_LEN); + // NO_LINT_DEBUG + sql_print_error("CHECKTABLE %s: record: %s", table_name, buf.c_str()); + + buf = rdb_hexdump(sec_key_copy.ptr(), sec_key_copy.length(), + RDB_MAX_HEXDUMP_LEN); + // NO_LINT_DEBUG + sql_print_error("CHECKTABLE %s: index: %s", table_name, buf.c_str()); + + goto error; + } + } + // NO_LINT_DEBUG + sql_print_verbose_info("CHECKTABLE %s: ... %lld index entries checked " + "(%lld had checksums)", + table_name, rows, checksums); + + if (first_index) { + row_checksums = + m_converter->get_row_checksums_checked() - row_checksums_at_start; + first_index = false; + } + ha_index_end(); + } + } + if (row_checksums != ha_rows(-1)) { + // NO_LINT_DEBUG + sql_print_verbose_info("CHECKTABLE %s: %lld table records had checksums", + table_name, row_checksums); + } + extra(HA_EXTRA_NO_KEYREAD); + + m_converter->set_verify_row_debug_checksums(save_verify_row_debug_checksums); + /* + TODO(sergiy): we should check also for PK records that are missing in + the secondary indexes. + For that, need to walk through the PK and check that every PK record has a + proper counterpart in each secondary index. + */ + DBUG_RETURN(HA_ADMIN_OK); +error: + m_converter->set_verify_row_debug_checksums(save_verify_row_debug_checksums); + ha_index_or_rnd_end(); + extra(HA_EXTRA_NO_KEYREAD); + + DBUG_RETURN(HA_ADMIN_CORRUPT); +} + +static void dbug_dump_str(FILE *const out, const char *const str, int len) { + fprintf(out, "\""); + for (int i = 0; i < len; i++) { + if (str[i] > 32) { + fprintf(out, "%c", str[i]); + } else { + fprintf(out, "\\%d", str[i]); + } + } + fprintf(out, "\""); +} + +/* + Debugging help: dump the whole database into a human-readable file. + Usage: + dbug_dump_database(rdb); +*/ + +void dbug_dump_database(rocksdb::DB *const db) { + FILE *const out = fopen("/tmp/rocksdb.dump", "wt"); + if (!out) return; + + rocksdb::Iterator *it = db->NewIterator(rocksdb::ReadOptions()); + for (it->SeekToFirst(); it->Valid(); it->Next()) { + rocksdb::Slice key = it->key(); + rocksdb::Slice val = it->value(); + dbug_dump_str(out, key.data(), key.size()); + fprintf(out, " -> "); + dbug_dump_str(out, val.data(), val.size()); + fprintf(out, "\n"); + } + + delete it; + fclose(out); +} + +rocksdb::Status ha_rocksdb::get_for_update( + Rdb_transaction *const tx, rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, rocksdb::PinnableSlice *const value) const { + DBUG_ASSERT(m_lock_rows != RDB_LOCK_NONE); + const bool exclusive = m_lock_rows != RDB_LOCK_READ; + + const bool do_validate = + my_core::thd_tx_isolation(ha_thd()) > ISO_READ_COMMITTED; + rocksdb::Status s = + tx->get_for_update(column_family, key, value, exclusive, do_validate); + +#ifndef DBUG_OFF + ++rocksdb_num_get_for_update_calls; +#endif + return s; +} + +bool ha_rocksdb::is_blind_delete_enabled() { + THD *thd = ha_thd(); + /* + Note: in MariaDB, thd->lex->table_count is only set for multi-table DELETE, + not for single-table DELETE. So we check thd->lex->query_tables instead. + */ + return (THDVAR(thd, blind_delete_primary_key) && + thd->lex->sql_command == SQLCOM_DELETE && + thd->lex->query_tables && !thd->lex->query_tables->next_global && + table->s->keys == 1 && + !has_hidden_pk(table) && !thd->rgi_slave); +} + +/* + Given a rowid (i.e. packed PK) as a parameter, get the record. + + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ + +int ha_rocksdb::get_row_by_rowid(uchar *const buf, const char *const rowid, + const uint rowid_size, const bool skip_lookup, + const bool skip_ttl_check) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(table != nullptr); + + int rc; + + rocksdb::Slice key_slice(rowid, rowid_size); + + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + DBUG_ASSERT(tx != nullptr); + +#ifdef ENABLED_DEBUG_SYNC + DEBUG_SYNC(ha_thd(), "rocksdb.get_row_by_rowid"); + DBUG_EXECUTE_IF("dbug.rocksdb.get_row_by_rowid", { + THD *thd = ha_thd(); + const char act[] = + "now signal Reached " + "wait_for signal.rocksdb.get_row_by_rowid_let_running"; + DBUG_ASSERT(opt_debug_sync_timeout > 0); + DBUG_ASSERT(!debug_sync_set_action(thd, STRING_WITH_LEN(act))); + };); +#endif /* ENABLED_DEBUG_SYNC */ + + bool found; + rocksdb::Status s; + + /* Pretend row found without looking up */ + if (skip_lookup) { +#ifdef MARIAROCKS_NOT_YET + stats.rows_deleted_blind++; +#endif + update_row_stats(ROWS_DELETED_BLIND); + m_last_rowkey.copy((const char *)rowid, rowid_size, &my_charset_bin); + table->status = 0; + DBUG_RETURN(0); + } + + if (m_lock_rows == RDB_LOCK_NONE) { + tx->acquire_snapshot(true); + s = tx->get(m_pk_descr->get_cf(), key_slice, &m_retrieved_record); + } else if (m_insert_with_update && m_dup_pk_found) { + DBUG_ASSERT(m_pk_descr->get_keyno() == m_dupp_errkey); + DBUG_ASSERT(m_dup_pk_retrieved_record.length() == + m_retrieved_record.size()); + DBUG_ASSERT(memcmp(m_dup_pk_retrieved_record.ptr(), + m_retrieved_record.data(), + m_retrieved_record.size()) == 0); + + // do nothing - we already have the result in m_retrieved_record and + // already taken the lock + s = rocksdb::Status::OK(); + } else { + s = get_for_update(tx, m_pk_descr->get_cf(), key_slice, + &m_retrieved_record); + } + + DBUG_EXECUTE_IF("rocksdb_return_status_corrupted", + dbug_change_status_to_corrupted(&s);); + + if (!s.IsNotFound() && !s.ok()) { + DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def, + m_table_handler)); + } + found = !s.IsNotFound(); + + table->status = STATUS_NOT_FOUND; + if (found) { + /* If we found the record, but it's expired, pretend we didn't find it. */ + if (!skip_ttl_check && m_pk_descr->has_ttl() && + should_hide_ttl_rec(*m_pk_descr, m_retrieved_record, + tx->m_snapshot_timestamp)) { + DBUG_RETURN(HA_ERR_KEY_NOT_FOUND); + } + + m_last_rowkey.copy((const char *)rowid, rowid_size, &my_charset_bin); + rc = convert_record_from_storage_format(&key_slice, buf); + + if (!rc) { + table->status = 0; + } + } else { + /* + Note: we don't need to unlock the row. It is intentional that we keep + locks on rows that don't exist. + */ + rc = HA_ERR_KEY_NOT_FOUND; + } + + DBUG_RETURN(rc); +} + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::index_next(uchar *const buf) { + DBUG_ENTER_FUNC(); + + bool moves_forward = true; + if (m_key_descr_arr[active_index]->m_is_reverse_cf) { + moves_forward = false; + } + + int rc = index_next_with_direction(buf, moves_forward); + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; + + DBUG_RETURN(rc); +} + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::index_prev(uchar *const buf) { + DBUG_ENTER_FUNC(); + + bool moves_forward = false; + if (m_key_descr_arr[active_index]->m_is_reverse_cf) { + moves_forward = true; + } + + int rc = index_next_with_direction(buf, moves_forward); + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; + + DBUG_RETURN(rc); +} + +int ha_rocksdb::index_next_with_direction(uchar *const buf, bool move_forward) { + DBUG_ENTER_FUNC(); + + int rc; + + if (active_index == pk_index(table, m_tbl_def)) { + rc = rnd_next_with_direction(buf, move_forward); + } else { + THD *thd = ha_thd(); + for (;;) { + DEBUG_SYNC(thd, "rocksdb.check_flags_inwd"); + if (thd && thd->killed) { + rc = HA_ERR_QUERY_INTERRUPTED; + break; + } + if (m_skip_scan_it_next_call) { + m_skip_scan_it_next_call = false; + } else { + if (move_forward) { + m_scan_it->Next(); /* this call cannot fail */ + } else { + m_scan_it->Prev(); + } + } + rc = rocksdb_skip_expired_records(*m_key_descr_arr[active_index], + m_scan_it, !move_forward); + if (rc != HA_EXIT_SUCCESS) { + break; + } + rc = find_icp_matching_index_rec(move_forward, buf); + if (!rc) rc = secondary_index_read(active_index, buf); + if (!should_skip_invalidated_record(rc)) { + break; + } + } + } + + DBUG_RETURN(rc); +} + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::index_first(uchar *const buf) { + DBUG_ENTER_FUNC(); + + m_sk_match_prefix = nullptr; + int rc = m_key_descr_arr[active_index]->m_is_reverse_cf + ? index_last_intern(buf) + : index_first_intern(buf); + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; + + DBUG_RETURN(rc); +} + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::index_last(uchar *const buf) { + DBUG_ENTER_FUNC(); + + m_sk_match_prefix = nullptr; + int rc = m_key_descr_arr[active_index]->m_is_reverse_cf + ? index_first_intern(buf) + : index_last_intern(buf); + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; + + DBUG_RETURN(rc); +} + +/* + Start scanning from the "first" value. + + The 'first' here means "the first from start of the key space". + For reverse-ordered key spaces, the first value will be the biggest, 'zzzz'. + + An picture of a forward-ordered keyspace (remember, the keys have form + 'indexnr-keyval'. Suppose the index we are at has number n) + + (n-1) - ... + ( n ) <--- 1. (n) doesn't exist in the db but it would be here. + ( n ) - aaa <--- 2. Seek("n") will put us here on the first index + ( n ) - bbb record. + ( n ) - cc + + So, need to do: Seek(n); + + A backward-ordered keyspace: + + (n+1) - bbb + (n+1) - aaa + (n+1) <--- (n+1) doesn't exist in the db but would be here. + ( n ) - ccc <--- 1. We need to be here. + ( n ) - bbb + ( n ) - aaa + ( n ) + + So, need to: Seek(n+1); + +*/ + +int ha_rocksdb::index_first_intern(uchar *const buf) { + DBUG_ENTER_FUNC(); + + uchar *key; + uint key_size; + int rc; + + if (is_pk(active_index, table, m_tbl_def)) { + key = m_pk_packed_tuple; + } else { + key = m_sk_packed_tuple; + } + + DBUG_ASSERT(key != nullptr); + + const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + int key_start_matching_bytes = kd.get_first_key(key, &key_size); + + rocksdb::Slice index_key((const char *)key, key_size); + + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + DBUG_ASSERT(tx != nullptr); + + const bool is_new_snapshot = !tx->has_snapshot(); + // Loop as long as we get a deadlock error AND we end up creating the + // snapshot here (i.e. it did not exist prior to this) + for (;;) { + setup_scan_iterator(kd, &index_key, false, key_start_matching_bytes); + m_scan_it->Seek(index_key); + m_skip_scan_it_next_call = true; + + rc = index_next_with_direction(buf, true); + if (!should_recreate_snapshot(rc, is_new_snapshot)) { + break; /* exit the loop */ + } + + // release the snapshot and iterator so they will be regenerated + tx->release_snapshot(); + release_scan_iterator(); + } + + if (!rc) { + /* + index_next is always incremented on success, so decrement if it is + index_first instead + */ +#ifdef MARIAROCKS_NOT_YET + stats.rows_index_first++; + stats.rows_index_next--; +#endif + } + + DBUG_RETURN(rc); +} + +/** + @details + Start scanning from the "last" value + + The 'last' here means "the last from start of the key space". + For reverse-ordered key spaces, we will actually read the smallest value. + + An picture of a forward-ordered keyspace (remember, the keys have form + 'indexnr-keyval'. Suppose the we are at a key that has number n) + + (n-1)-something + ( n )-aaa + ( n )-bbb + ( n )-ccc <----------- Need to seek to here. + (n+1) <---- Doesn't exist, but would be here. + (n+1)-smth, or no value at all + + RocksDB's Iterator::SeekForPrev($val) seeks to "at $val or last value that's + smaller". We can't seek to "(n)-ccc" directly, because we don't know what + is the value of 'ccc' (the biggest record with prefix (n)). Instead, we seek + to "(n+1)", which is the least possible value that's greater than any value + in index #n. + + So, need to: it->SeekForPrev(n+1) + + A backward-ordered keyspace: + + (n+1)-something + ( n ) - ccc + ( n ) - bbb + ( n ) - aaa <---------------- (*) Need to seek here. + ( n ) <--- Doesn't exist, but would be here. + (n-1)-smth, or no value at all + + So, need to: it->SeekForPrev(n) +*/ + +int ha_rocksdb::index_last_intern(uchar *const buf) { + DBUG_ENTER_FUNC(); + + uchar *key; + uint key_size; + int rc; + + if (is_pk(active_index, table, m_tbl_def)) { + key = m_pk_packed_tuple; + } else { + key = m_sk_packed_tuple; + } + + DBUG_ASSERT(key != nullptr); + + const Rdb_key_def &kd = *m_key_descr_arr[active_index]; + int key_end_matching_bytes = kd.get_last_key(key, &key_size); + + rocksdb::Slice index_key((const char *)key, key_size); + + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + DBUG_ASSERT(tx != nullptr); + + bool is_new_snapshot = !tx->has_snapshot(); + // Loop as long as we get a deadlock error AND we end up creating the + // snapshot here (i.e. it did not exist prior to this) + for (;;) { + setup_scan_iterator(kd, &index_key, false, key_end_matching_bytes); + m_scan_it->SeekForPrev(index_key); + m_skip_scan_it_next_call = false; + + if (is_pk(active_index, table, m_tbl_def)) { + m_skip_scan_it_next_call = true; + rc = rnd_next_with_direction(buf, false); + } else { + rc = find_icp_matching_index_rec(false /*move_forward*/, buf); + if (!rc) rc = secondary_index_read(active_index, buf); + } + + if (!should_recreate_snapshot(rc, is_new_snapshot)) { + break; /* exit the loop */ + } + + // release the snapshot and iterator so they will be regenerated + tx->release_snapshot(); + release_scan_iterator(); + } + + if (!rc) { + /* + index_next is always incremented on success, so decrement if it is + index_first instead + */ +#ifdef MARIAROCKS_NOT_YET + stats.rows_index_first++; + stats.rows_index_next--; +#endif + } + + DBUG_RETURN(rc); +} + +void ha_rocksdb::unlock_row() { + DBUG_ENTER_FUNC(); + + if (m_lock_rows != RDB_LOCK_NONE) { + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + tx->release_lock(m_pk_descr->get_cf(), + std::string(m_last_rowkey.ptr(), m_last_rowkey.length())); + } + + DBUG_VOID_RETURN; +} + +/* + Returning true if SingleDelete can be used. + - Secondary Indexes can always use SingleDelete. + - If the index is PRIMARY KEY, and if all of the columns of the table + are covered by the PRIMARY KEY, SingleDelete can be used. +*/ +bool ha_rocksdb::can_use_single_delete(const uint index) const { + return (index != pk_index(table, m_tbl_def) || + (!has_hidden_pk(table) && + table->key_info[index].ext_key_parts == table->s->fields)); +} + +bool ha_rocksdb::skip_unique_check() const { + /* + We want to skip unique checks if: + 1) bulk_load is on + 2) this table is in the whitelist of tables to skip and the replication + lag has reached a large enough value (see unique_check_lag_threshold + and unique_check_lage_reset_threshold) + 3) the user set unique_checks option to 0, and the table does not have + any indexes. If the table has secondary keys, then those might becomes + inconsisted/corrupted + 4) We're using read-free replication + */ + return THDVAR(table->in_use, bulk_load) || + (m_force_skip_unique_check && m_skip_unique_check) || + (my_core::thd_test_options(table->in_use, + OPTION_RELAXED_UNIQUE_CHECKS) && + m_tbl_def->m_key_count == 1) || +#ifdef MARIAROCKS_NOT_YET + use_read_free_rpl(); +#else + FALSE; +#endif +} + +#ifdef MARIAROCKS_NOT_YET // MDEV-10975 +void ha_rocksdb::set_force_skip_unique_check(bool skip) { + DBUG_ENTER_FUNC(); + + m_force_skip_unique_check = skip; + + DBUG_VOID_RETURN; +} +#endif + +bool ha_rocksdb::commit_in_the_middle() { + return THDVAR(table->in_use, bulk_load) || + THDVAR(table->in_use, commit_in_the_middle); +} + +/* + Executing bulk commit if it should. + @retval true if bulk commit failed + @retval false if bulk commit was skipped or succeeded +*/ +bool ha_rocksdb::do_bulk_commit(Rdb_transaction *const tx) { + return commit_in_the_middle() && + tx->get_write_count() >= THDVAR(table->in_use, bulk_load_size) && + tx->flush_batch(); +} + +/* + If table was created without primary key, SQL layer represents the primary + key number as MAX_INDEXES. Hence, this function returns true if the table + does not contain a primary key. (In which case we generate a hidden + 'auto-incremented' pk.) +*/ +bool ha_rocksdb::has_hidden_pk(const TABLE *const table) const { + return Rdb_key_def::table_has_hidden_pk(table); +} + +/* + Returns true if given index number is a hidden_pk. + - This is used when a table is created with no primary key. +*/ +bool ha_rocksdb::is_hidden_pk(const uint index, const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) { + DBUG_ASSERT(table_arg->s != nullptr); + + return (table_arg->s->primary_key == MAX_INDEXES && + index == tbl_def_arg->m_key_count - 1); +} + +/* Returns index of primary key */ +uint ha_rocksdb::pk_index(const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) { + DBUG_ASSERT(table_arg->s != nullptr); + + return table_arg->s->primary_key == MAX_INDEXES ? tbl_def_arg->m_key_count - 1 + : table_arg->s->primary_key; +} + +/* Returns true if given index number is a primary key */ +bool ha_rocksdb::is_pk(const uint index, const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) { + DBUG_ASSERT(table_arg->s != nullptr); + + return index == table_arg->s->primary_key || + is_hidden_pk(index, table_arg, tbl_def_arg); +} + +uint ha_rocksdb::max_supported_key_part_length() const { + DBUG_ENTER_FUNC(); + DBUG_RETURN(rocksdb_large_prefix ? MAX_INDEX_COL_LEN_LARGE + : MAX_INDEX_COL_LEN_SMALL); +} + +const char *ha_rocksdb::get_key_name(const uint index, + const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) { + if (is_hidden_pk(index, table_arg, tbl_def_arg)) { + return HIDDEN_PK_NAME; + } + + DBUG_ASSERT(table_arg->key_info != nullptr); + DBUG_ASSERT(table_arg->key_info[index].name.str != nullptr); + + return table_arg->key_info[index].name.str; +} + +const char *ha_rocksdb::get_key_comment(const uint index, + const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) { + if (is_hidden_pk(index, table_arg, tbl_def_arg)) { + return nullptr; + } + + DBUG_ASSERT(table_arg->key_info != nullptr); + + return table_arg->key_info[index].comment.str; +} + +const std::string ha_rocksdb::generate_cf_name( + const uint index, const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found) { + DBUG_ASSERT(table_arg != nullptr); + DBUG_ASSERT(tbl_def_arg != nullptr); + DBUG_ASSERT(per_part_match_found != nullptr); + + // When creating CF-s the caller needs to know if there was a custom CF name + // specified for a given paritition. + *per_part_match_found = false; + + // Index comment is used to define the column family name specification(s). + // If there was no comment, we get an emptry string, and it means "use the + // default column family". + const char *const comment = get_key_comment(index, table_arg, tbl_def_arg); + + // `get_key_comment` can return `nullptr`, that's why this. + std::string key_comment = comment ? comment : ""; + + std::string cf_name = Rdb_key_def::parse_comment_for_qualifier( + key_comment, table_arg, tbl_def_arg, per_part_match_found, + RDB_CF_NAME_QUALIFIER); + + if (IF_PARTITIONING(table_arg->part_info,nullptr) != nullptr && !*per_part_match_found) { + // At this point we tried to search for a custom CF name for a partition, + // but none was specified. Therefore default one will be used. + return ""; + } + + // If we didn't find any partitioned/non-partitioned qualifiers, return the + // comment itself. NOTE: this currently handles returning the cf name + // specified in the index comment in the case of no partitions, which doesn't + // use any qualifiers at the moment. (aka its a special case) + if (cf_name.empty() && !key_comment.empty()) { + return key_comment; + } + + return cf_name; +} + +const std::string ha_rocksdb::get_table_comment(const TABLE *const table_arg) { + DBUG_ASSERT(table_arg->s != nullptr); + + return table_arg->s->comment.str; +} + +/** + Write a new row + + @param[in] buf new row data to write + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::write_row(const uchar *const buf) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(buf != nullptr); + DBUG_ASSERT(buf == table->record[0]); + DBUG_ASSERT(m_lock_rows == RDB_LOCK_WRITE); + + /* + Note: "buf == table->record[0]" is copied from innodb. I am not aware of + any use cases where this condition is not true. + */ + if (table->next_number_field && buf == table->record[0]) { + int err; + if ((err = update_auto_increment())) { + DBUG_RETURN(err); + } + } + + // clear cache at beginning of write for INSERT ON DUPLICATE + // we may get multiple write->fail->read->update if there are multiple + // values from INSERT + m_dup_pk_found = false; + + const int rv = update_write_row(nullptr, buf, skip_unique_check()); + + if (rv == 0) { +#ifdef MARIAROCKS_NOT_YET + stats.rows_inserted++; +#endif + update_row_stats(ROWS_INSERTED); + } + + DBUG_RETURN(rv); +} + +/** + Constructing m_last_rowkey (MyRocks key expression) from + before_update|delete image (MySQL row expression). + m_last_rowkey is normally set during lookup phase, such as + rnd_next_with_direction() and rnd_pos(). With Read Free Replication, + these read functions are skipped and update_rows(), delete_rows() are + called without setting m_last_rowkey. This function sets m_last_rowkey + for Read Free Replication. +*/ +void ha_rocksdb::set_last_rowkey(const uchar *const old_data) { +#ifdef MARIAROCKS_NOT_YET + if (old_data && use_read_free_rpl()) { + const int old_pk_size = m_pk_descr->pack_record( + table, m_pack_buffer, old_data, m_pk_packed_tuple, nullptr, false); + m_last_rowkey.copy((const char *)m_pk_packed_tuple, old_pk_size, + &my_charset_bin); + } +#endif +} + +/** + Collect update data for primary key + + @param[in, out] row_info hold all data for update row, such as + new row data/old row data + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::get_pk_for_update(struct update_row_info *const row_info) { + int size; + + /* + Get new row key for any insert, and any update where the pk is not hidden. + Row key for updates with hidden pk is handled below. + */ + if (!has_hidden_pk(table)) { + row_info->hidden_pk_id = 0; + + row_info->new_pk_unpack_info = &m_pk_unpack_info; + + size = m_pk_descr->pack_record( + table, m_pack_buffer, row_info->new_data, m_pk_packed_tuple, + row_info->new_pk_unpack_info, false, 0, 0, nullptr); + } else if (row_info->old_data == nullptr) { + row_info->hidden_pk_id = update_hidden_pk_val(); + size = + m_pk_descr->pack_hidden_pk(row_info->hidden_pk_id, m_pk_packed_tuple); + } else { + /* + If hidden primary key, rowkey for new record will always be the same as + before + */ + size = row_info->old_pk_slice.size(); + memcpy(m_pk_packed_tuple, row_info->old_pk_slice.data(), size); + int err = read_hidden_pk_id_from_rowkey(&row_info->hidden_pk_id); + if (err) { + return err; + } + } + + row_info->new_pk_slice = + rocksdb::Slice((const char *)m_pk_packed_tuple, size); + + return HA_EXIT_SUCCESS; +} + +/** + Check the specified primary key value is unique and also lock the row + + @param[in] key_id key index + @param[in] row_info hold all data for update row, such as old row + data and new row data + @param[out] found whether the primary key exists before. + @param[out] pk_changed whether primary key is changed + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::check_and_lock_unique_pk(const uint key_id, + const struct update_row_info &row_info, + bool *const found) { + DBUG_ASSERT(found != nullptr); + + DBUG_ASSERT(row_info.old_pk_slice.size() == 0 || + row_info.new_pk_slice.compare(row_info.old_pk_slice) != 0); + + /* Ignore PK violations if this is a optimized 'replace into' */ +#ifdef MARIAROCKS_NOT_YET + const bool ignore_pk_unique_check = ha_thd()->lex->blind_replace_into; +#else + const bool ignore_pk_unique_check= false; +#endif + + /* + Perform a read to determine if a duplicate entry exists. For primary + keys, a point lookup will be sufficient. + + note: we intentionally don't set options.snapshot here. We want to read + the latest committed data. + */ + + /* + To prevent race conditions like below, it is necessary to + take a lock for a target row. get_for_update() holds a gap lock if + target key does not exist, so below conditions should never + happen. + + 1) T1 Get(empty) -> T2 Get(empty) -> T1 Put(insert) -> T1 commit + -> T2 Put(overwrite) -> T2 commit + 2) T1 Get(empty) -> T1 Put(insert, not committed yet) -> T2 Get(empty) + -> T2 Put(insert, blocked) -> T1 commit -> T2 commit(overwrite) + */ + const rocksdb::Status s = + get_for_update(row_info.tx, m_pk_descr->get_cf(), row_info.new_pk_slice, + ignore_pk_unique_check ? nullptr : &m_retrieved_record); + if (!s.ok() && !s.IsNotFound()) { + return row_info.tx->set_status_error( + table->in_use, s, *m_key_descr_arr[key_id], m_tbl_def, m_table_handler); + } + + bool key_found = ignore_pk_unique_check ? false : !s.IsNotFound(); + + /* + If the pk key has ttl, we may need to pretend the row wasn't + found if it is already expired. + */ + if (key_found && m_pk_descr->has_ttl() && + should_hide_ttl_rec(*m_pk_descr, m_retrieved_record, + (row_info.tx->m_snapshot_timestamp + ? row_info.tx->m_snapshot_timestamp + : static_cast(std::time(nullptr))))) { + key_found = false; + } + + if (key_found && row_info.old_data == nullptr && m_insert_with_update) { + // In INSERT ON DUPLICATE KEY UPDATE ... case, if the insert failed + // due to a duplicate key, remember the last key and skip the check + // next time + m_dup_pk_found = true; + +#ifndef DBUG_OFF + // save it for sanity checking later + m_dup_pk_retrieved_record.copy(m_retrieved_record.data(), + m_retrieved_record.size(), &my_charset_bin); +#endif + } + + *found = key_found; + + return HA_EXIT_SUCCESS; +} + +/** + Check the specified secondary key value is unique and also lock the row + + @param[in] key_id key index + @param[in] row_info hold all data for update row, such as old row + data and new row data + @param[out] found whether specified key value exists before. + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::check_and_lock_sk(const uint key_id, + const struct update_row_info &row_info, + bool *const found) { + DBUG_ASSERT(found != nullptr); + *found = false; + + /* + Can skip checking this key if none of the key fields have changed. + */ + if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id)) { + return HA_EXIT_SUCCESS; + } + + KEY *key_info = nullptr; + uint n_null_fields = 0; + uint user_defined_key_parts = 1; + + key_info = &table->key_info[key_id]; + user_defined_key_parts = key_info->user_defined_key_parts; + /* + If there are no uniqueness requirements, there's no need to obtain a + lock for this key. + */ + if (!(key_info->flags & HA_NOSAME)) { + return HA_EXIT_SUCCESS; + } + + const Rdb_key_def &kd = *m_key_descr_arr[key_id]; + + /* + Calculate the new key for obtaining the lock + + For unique secondary indexes, the key used for locking does not + include the extended fields. + */ + int size = + kd.pack_record(table, m_pack_buffer, row_info.new_data, m_sk_packed_tuple, + nullptr, false, 0, user_defined_key_parts, &n_null_fields); + if (n_null_fields > 0) { + /* + If any fields are marked as NULL this will never match another row as + to NULL never matches anything else including another NULL. + */ + return HA_EXIT_SUCCESS; + } + + const rocksdb::Slice new_slice = + rocksdb::Slice((const char *)m_sk_packed_tuple, size); + + /* + Acquire lock on the old key in case of UPDATE + */ + if (row_info.old_data != nullptr) { + size = kd.pack_record(table, m_pack_buffer, row_info.old_data, + m_sk_packed_tuple_old, nullptr, false, 0, + user_defined_key_parts); + const rocksdb::Slice old_slice = + rocksdb::Slice((const char *)m_sk_packed_tuple_old, size); + + const rocksdb::Status s = + get_for_update(row_info.tx, kd.get_cf(), old_slice, nullptr); + if (!s.ok()) { + return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def, + m_table_handler); + } + + /* + If the old and new keys are the same we're done since we've already taken + the lock on the old key + */ + if (!new_slice.compare(old_slice)) { + return HA_EXIT_SUCCESS; + } + } + + /* + Perform a read to determine if a duplicate entry exists - since this is + a secondary indexes a range scan is needed. + + note: we intentionally don't set options.snapshot here. We want to read + the latest committed data. + */ + + const bool all_parts_used = (user_defined_key_parts == kd.get_key_parts()); + + /* + This iterator seems expensive since we need to allocate and free + memory for each unique index. + + If this needs to be optimized, for keys without NULL fields, the + extended primary key fields can be migrated to the value portion of the + key. This enables using Get() instead of Seek() as in the primary key + case. + + The bloom filter may need to be disabled for this lookup. + */ + uchar lower_bound_buf[Rdb_key_def::INDEX_NUMBER_SIZE]; + uchar upper_bound_buf[Rdb_key_def::INDEX_NUMBER_SIZE]; + rocksdb::Slice lower_bound_slice; + rocksdb::Slice upper_bound_slice; + + const bool total_order_seek = !check_bloom_and_set_bounds( + ha_thd(), kd, new_slice, all_parts_used, Rdb_key_def::INDEX_NUMBER_SIZE, + lower_bound_buf, upper_bound_buf, &lower_bound_slice, &upper_bound_slice); + const bool fill_cache = !THDVAR(ha_thd(), skip_fill_cache); + + const rocksdb::Status s = + get_for_update(row_info.tx, kd.get_cf(), new_slice, nullptr); + if (!s.ok() && !s.IsNotFound()) { + return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def, + m_table_handler); + } + + rocksdb::Iterator *const iter = row_info.tx->get_iterator( + kd.get_cf(), total_order_seek, fill_cache, lower_bound_slice, + upper_bound_slice, true /* read current data */, + false /* acquire snapshot */); + /* + Need to scan the transaction to see if there is a duplicate key. + Also need to scan RocksDB and verify the key has not been deleted + in the transaction. + */ + iter->Seek(new_slice); + *found = !read_key_exact(kd, iter, all_parts_used, new_slice, + row_info.tx->m_snapshot_timestamp); + delete iter; + + return HA_EXIT_SUCCESS; +} + +/** + Enumerate all keys to check their uniquess and also lock it + + @param[in] row_info hold all data for update row, such as old row + data and new row data + @param[out] pk_changed whether primary key is changed + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::check_uniqueness_and_lock( + const struct update_row_info &row_info, bool pk_changed) { + /* + Go through each index and determine if the index has uniqueness + requirements. If it does, then try to obtain a row lock on the new values. + Once all locks have been obtained, then perform the changes needed to + update/insert the row. + */ + for (uint key_id = 0; key_id < m_tbl_def->m_key_count; key_id++) { + bool found; + int rc; + + if (is_pk(key_id, table, m_tbl_def)) { + if (row_info.old_pk_slice.size() > 0 && !pk_changed) { + found = false; + rc = HA_EXIT_SUCCESS; + } else { + rc = check_and_lock_unique_pk(key_id, row_info, &found); + } + } else { + rc = check_and_lock_sk(key_id, row_info, &found); + } + + if (rc != HA_EXIT_SUCCESS) { + return rc; + } + + if (found) { + /* There is a row with this key already, so error out. */ + errkey = key_id; + m_dupp_errkey = errkey; + + return HA_ERR_FOUND_DUPP_KEY; + } + } + + return HA_EXIT_SUCCESS; +} + +/** + Check whether secondary key value is duplicate or not + + @param[in] table_arg the table currently working on + @param[in key_def the key_def is being checked + @param[in] key secondary key storage data + @param[out] sk_info hold secondary key memcmp datas(new/old) + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ + +int ha_rocksdb::check_duplicate_sk(const TABLE *table_arg, + const Rdb_key_def &key_def, + const rocksdb::Slice *key, + struct unique_sk_buf_info *sk_info) { + uint n_null_fields = 0; + const rocksdb::Comparator *index_comp = key_def.get_cf()->GetComparator(); + + /* Get proper SK buffer. */ + uchar *sk_buf = sk_info->swap_and_get_sk_buf(); + + /* Get memcmp form of sk without extended pk tail */ + uint sk_memcmp_size = + key_def.get_memcmp_sk_parts(table_arg, *key, sk_buf, &n_null_fields); + + sk_info->sk_memcmp_key = + rocksdb::Slice(reinterpret_cast(sk_buf), sk_memcmp_size); + + if (sk_info->sk_memcmp_key_old.size() > 0 && n_null_fields == 0 && + index_comp->Compare(sk_info->sk_memcmp_key, sk_info->sk_memcmp_key_old) == + 0) { + return 1; + } + + sk_info->sk_memcmp_key_old = sk_info->sk_memcmp_key; + return 0; +} + +int ha_rocksdb::bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd, + const rocksdb::Slice &key, + const rocksdb::Slice &value, bool sort) { + DBUG_ENTER_FUNC(); + int res; + THD *thd = ha_thd(); + if (thd && thd->killed) { + DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); + } + + rocksdb::ColumnFamilyHandle *cf = kd.get_cf(); + + // In the case of unsorted inserts, m_sst_info allocated here is not + // used to store the keys. It is still used to indicate when tables + // are switched. + if (m_sst_info == nullptr || m_sst_info->is_done()) { + m_sst_info.reset(new Rdb_sst_info(rdb, m_table_handler->m_table_name, + kd.get_name(), cf, *rocksdb_db_options, + THDVAR(ha_thd(), trace_sst_api))); + res = tx->start_bulk_load(this, m_sst_info); + if (res != HA_EXIT_SUCCESS) { + DBUG_RETURN(res); + } + } + DBUG_ASSERT(m_sst_info); + + if (sort) { + Rdb_index_merge *key_merge; + DBUG_ASSERT(cf != nullptr); + + res = tx->get_key_merge(kd.get_gl_index_id(), cf, &key_merge); + if (res == HA_EXIT_SUCCESS) { + res = key_merge->add(key, value); + } + } else { + res = m_sst_info->put(key, value); + } + + DBUG_RETURN(res); +} + +int ha_rocksdb::finalize_bulk_load(bool print_client_error) { + DBUG_ENTER_FUNC(); + + int res = HA_EXIT_SUCCESS; + + /* Skip if there are no possible ongoing bulk loads */ + if (m_sst_info) { + if (m_sst_info->is_done()) { + m_sst_info.reset(); + DBUG_RETURN(res); + } + + Rdb_sst_info::Rdb_sst_commit_info commit_info; + + // Wrap up the current work in m_sst_info and get ready to commit + // This transfer the responsibility of commit over to commit_info + res = m_sst_info->finish(&commit_info, print_client_error); + if (res == 0) { + // Make sure we have work to do - under race condition we could lose + // to another thread and end up with no work + if (commit_info.has_work()) { + rocksdb::IngestExternalFileOptions opts; + opts.move_files = true; + opts.snapshot_consistency = false; + opts.allow_global_seqno = false; + opts.allow_blocking_flush = false; + + const rocksdb::Status s = rdb->IngestExternalFile( + commit_info.get_cf(), commit_info.get_committed_files(), opts); + if (!s.ok()) { + if (print_client_error) { + Rdb_sst_info::report_error_msg(s, nullptr); + } + res = HA_ERR_ROCKSDB_BULK_LOAD; + } else { + // Mark the list of SST files as committed, otherwise they'll get + // cleaned up when commit_info destructs + commit_info.commit(); + } + } + } + m_sst_info.reset(); + } + DBUG_RETURN(res); +} + +/** + Update an existing primary key record or write a new primary key record + + @param[in] kd the primary key is being update/write + @param[in] update_row_info hold all row data, such as old row data and + new row data + @param[in] pk_changed whether primary key is changed + @return + HA_EXIT_SUCCESS OK + Other HA_ERR error code (can be SE-specific) + */ +int ha_rocksdb::update_write_pk(const Rdb_key_def &kd, + const struct update_row_info &row_info, + bool pk_changed) { + uint key_id = kd.get_keyno(); + bool hidden_pk = is_hidden_pk(key_id, table, m_tbl_def); + ulonglong bytes_written = 0; + + /* + If the PK has changed, or if this PK uses single deletes and this is an + update, the old key needs to be deleted. In the single delete case, it + might be possible to have this sequence of keys: PUT(X), PUT(X), SD(X), + resulting in the first PUT(X) showing up. + */ + if (!hidden_pk && (pk_changed || ((row_info.old_pk_slice.size() > 0) && + can_use_single_delete(key_id)))) { + const rocksdb::Status s = delete_or_singledelete( + key_id, row_info.tx, kd.get_cf(), row_info.old_pk_slice); + if (!s.ok()) { + return row_info.tx->set_status_error(table->in_use, s, kd, m_tbl_def, + m_table_handler); + } else { + bytes_written = row_info.old_pk_slice.size(); + } + } + + if (table->found_next_number_field) { + update_auto_incr_val_from_field(); + } + + int rc = HA_EXIT_SUCCESS; + rocksdb::Slice value_slice; + /* Prepare the new record to be written into RocksDB */ + if ((rc = m_converter->encode_value_slice( + m_pk_descr, row_info.new_pk_slice, row_info.new_pk_unpack_info, + !row_info.old_pk_slice.empty(), should_store_row_debug_checksums(), + m_ttl_bytes, &m_ttl_bytes_updated, &value_slice))) { + return rc; + } + + const auto cf = m_pk_descr->get_cf(); + if (rocksdb_enable_bulk_load_api && THDVAR(table->in_use, bulk_load) && + !hidden_pk) { + /* + Write the primary key directly to an SST file using an SstFileWriter + */ + rc = bulk_load_key(row_info.tx, kd, row_info.new_pk_slice, value_slice, + THDVAR(table->in_use, bulk_load_allow_unsorted)); + } else if (row_info.skip_unique_check || row_info.tx->m_ddl_transaction) { + /* + It is responsibility of the user to make sure that the data being + inserted doesn't violate any unique keys. + */ + row_info.tx->get_indexed_write_batch()->Put(cf, row_info.new_pk_slice, + value_slice); + } else { + const bool assume_tracked = can_assume_tracked(ha_thd()); + const auto s = row_info.tx->put(cf, row_info.new_pk_slice, value_slice, + assume_tracked); + if (!s.ok()) { + if (s.IsBusy()) { + errkey = table->s->primary_key; + m_dupp_errkey = errkey; + rc = HA_ERR_FOUND_DUPP_KEY; + } else { + rc = row_info.tx->set_status_error(table->in_use, s, *m_pk_descr, + m_tbl_def, m_table_handler); + } + } + } + + if (rc == HA_EXIT_SUCCESS) { + row_info.tx->update_bytes_written( + bytes_written + row_info.new_pk_slice.size() + value_slice.size()); + } + return rc; +} + +/** + update an existing secondary key record or write a new secondary key record + + @param[in] table_arg Table we're working on + @param[in] kd The secondary key being update/write + @param[in] row_info data structure contains old row data and new row data + @param[in] bulk_load_sk whether support bulk load. Currently it is only + support for write + @return + HA_EXIT_SUCCESS OK + Other HA_ERR error code (can be SE-specific) + */ +int ha_rocksdb::update_write_sk(const TABLE *const table_arg, + const Rdb_key_def &kd, + const struct update_row_info &row_info, + const bool bulk_load_sk) { + int new_packed_size; + int old_packed_size; + int rc = HA_EXIT_SUCCESS; + + rocksdb::Slice new_key_slice; + rocksdb::Slice new_value_slice; + rocksdb::Slice old_key_slice; + + const uint key_id = kd.get_keyno(); + + ulonglong bytes_written = 0; + + /* + Can skip updating this key if none of the key fields have changed and, if + this table has TTL, the TTL timestamp has not changed. + */ + if (row_info.old_data != nullptr && !m_update_scope.is_set(key_id) && + (!kd.has_ttl() || !m_ttl_bytes_updated)) { + return HA_EXIT_SUCCESS; + } + + bool store_row_debug_checksums = should_store_row_debug_checksums(); + new_packed_size = + kd.pack_record(table_arg, m_pack_buffer, row_info.new_data, + m_sk_packed_tuple, &m_sk_tails, store_row_debug_checksums, + row_info.hidden_pk_id, 0, nullptr, m_ttl_bytes); + + if (row_info.old_data != nullptr) { + // The old value + old_packed_size = kd.pack_record( + table_arg, m_pack_buffer, row_info.old_data, m_sk_packed_tuple_old, + &m_sk_tails_old, store_row_debug_checksums, row_info.hidden_pk_id, 0, + nullptr, m_ttl_bytes); + + /* + Check if we are going to write the same value. This can happen when + one does + UPDATE tbl SET col='foo' + and we are looking at the row that already has col='foo'. + + We also need to compare the unpack info. Suppose, the collation is + case-insensitive, and unpack info contains information about whether + the letters were uppercase and lowercase. Then, both 'foo' and 'FOO' + will have the same key value, but different data in unpack_info. + + (note: anyone changing bytewise_compare should take this code into + account) + */ + if (old_packed_size == new_packed_size && + m_sk_tails_old.get_current_pos() == m_sk_tails.get_current_pos() && + !(kd.has_ttl() && m_ttl_bytes_updated) && + memcmp(m_sk_packed_tuple_old, m_sk_packed_tuple, old_packed_size) == + 0 && + memcmp(m_sk_tails_old.ptr(), m_sk_tails.ptr(), + m_sk_tails.get_current_pos()) == 0) { + return HA_EXIT_SUCCESS; + } + + /* + Deleting entries from secondary index should skip locking, but + be visible to the transaction. + (also note that DDL statements do not delete rows, so this is not a DDL + statement) + */ + old_key_slice = rocksdb::Slice( + reinterpret_cast(m_sk_packed_tuple_old), old_packed_size); + + row_info.tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(), + old_key_slice); + + bytes_written = old_key_slice.size(); + } + + new_key_slice = rocksdb::Slice( + reinterpret_cast(m_sk_packed_tuple), new_packed_size); + new_value_slice = + rocksdb::Slice(reinterpret_cast(m_sk_tails.ptr()), + m_sk_tails.get_current_pos()); + + if (bulk_load_sk && row_info.old_data == nullptr) { + rc = bulk_load_key(row_info.tx, kd, new_key_slice, new_value_slice, true); + } else { + row_info.tx->get_indexed_write_batch()->Put(kd.get_cf(), new_key_slice, + new_value_slice); + } + + row_info.tx->update_bytes_written(bytes_written + new_key_slice.size() + + new_value_slice.size()); + + return rc; +} + +/** + Update existing indexes(PK/SKs) or write new indexes(PK/SKs) + + @param[in] row_info hold all row data, such as old key/new key + @param[in] pk_changed whether primary key is changed + @return + HA_EXIT_SUCCESS OK + Other HA_ERR error code (can be SE-specific) + */ +int ha_rocksdb::update_write_indexes(const struct update_row_info &row_info, + const bool pk_changed) { + int rc; + bool bulk_load_sk; + + // The PK must be updated first to pull out the TTL value. + rc = update_write_pk(*m_pk_descr, row_info, pk_changed); + if (rc != HA_EXIT_SUCCESS) { + return rc; + } + + // Update the remaining indexes. Allow bulk loading only if + // allow_sk is enabled + bulk_load_sk = rocksdb_enable_bulk_load_api && + THDVAR(table->in_use, bulk_load) && + THDVAR(table->in_use, bulk_load_allow_sk); + for (uint key_id = 0; key_id < m_tbl_def->m_key_count; key_id++) { + if (is_pk(key_id, table, m_tbl_def)) { + continue; + } + + rc = update_write_sk(table, *m_key_descr_arr[key_id], row_info, + bulk_load_sk); + if (rc != HA_EXIT_SUCCESS) { + return rc; + } + } + + return HA_EXIT_SUCCESS; +} + +/** + Update an existing row or write a new row + + @param[in] old_data nullptr for write, non-null for update + @param[in] new_data non-null for write/update + @param[in] skip_unique_check whether to check uniqueness + @return + HA_EXIT_SUCCESS OK + Other HA_ERR error code (can be SE-specific) + */ +int ha_rocksdb::update_write_row(const uchar *const old_data, + const uchar *const new_data, + const bool skip_unique_check) { + DBUG_ENTER_FUNC(); + + THD *thd = ha_thd(); + if (thd && thd->killed) { + DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); + } + + bool pk_changed = false; + struct update_row_info row_info; + + row_info.old_data = old_data; + row_info.new_data = new_data; + row_info.skip_unique_check = skip_unique_check; + row_info.new_pk_unpack_info = nullptr; + set_last_rowkey(old_data); + + row_info.tx = get_or_create_tx(table->in_use); + + if (old_data != nullptr) { + row_info.old_pk_slice = + rocksdb::Slice(m_last_rowkey.ptr(), m_last_rowkey.length()); + + /* Determine which indexes need updating. */ + calc_updated_indexes(); + } + + /* + Get the new row key into row_info.new_pk_slice + */ + int rc = get_pk_for_update(&row_info); + if (rc != HA_EXIT_SUCCESS) { + DBUG_RETURN(rc); + } + + /* + For UPDATEs, if the key has changed, we need to obtain a lock. INSERTs + always require locking. + */ + if (row_info.old_pk_slice.size() > 0) { + pk_changed = row_info.new_pk_slice.compare(row_info.old_pk_slice) != 0; + } + + if (!skip_unique_check) { + /* + Check to see if we are going to have failures because of unique + keys. Also lock the appropriate key values. + */ + rc = check_uniqueness_and_lock(row_info, pk_changed); + if (rc != HA_EXIT_SUCCESS) { + DBUG_RETURN(rc); + } + } + + DEBUG_SYNC(ha_thd(), "rocksdb.update_write_row_after_unique_check"); + + /* + At this point, all locks have been obtained, and all checks for duplicate + keys have been performed. No further errors can be allowed to occur from + here because updates to the transaction will be made and those updates + cannot be easily removed without rolling back the entire transaction. + */ + rc = update_write_indexes(row_info, pk_changed); + if (rc != HA_EXIT_SUCCESS) { + DBUG_RETURN(rc); + } + + if (old_data != nullptr) { + row_info.tx->incr_update_count(); + } else { + row_info.tx->incr_insert_count(); + } + + row_info.tx->log_table_write_op(m_tbl_def); + + if (do_bulk_commit(row_info.tx)) { + DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD); + } + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/* + Setting iterator upper/lower bounds for Seek/SeekForPrev. + This makes RocksDB to avoid scanning tombstones outside of + the given key ranges, when prefix_same_as_start=true was not passed + (when prefix bloom filter can not be used). + Inversing upper/lower bound is necessary on reverse order CF. + This covers HA_READ_PREFIX_LAST* case as well. For example, + if given query eq condition was 12 bytes and condition was + 0x0000b3eb003f65c5e78858b8, and if doing HA_READ_PREFIX_LAST, + eq_cond_len was 11 (see calc_eq_cond_len() for details). + If the index was reverse order, upper bound would be + 0x0000b3eb003f65c5e78857, and lower bound would be + 0x0000b3eb003f65c5e78859. These cover given eq condition range. + + @param lower_bound_buf IN Buffer for lower bound + @param upper_bound_buf IN Buffer for upper bound + + @param outer_u +*/ +void ha_rocksdb::setup_iterator_bounds( + const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, size_t bound_len, + uchar *const lower_bound, uchar *const upper_bound, + rocksdb::Slice *lower_bound_slice, rocksdb::Slice *upper_bound_slice) { + // If eq_cond is shorter than Rdb_key_def::INDEX_NUMBER_SIZE, we should be + // able to get better bounds just by using index id directly. + if (eq_cond.size() <= Rdb_key_def::INDEX_NUMBER_SIZE) { + DBUG_ASSERT(bound_len == Rdb_key_def::INDEX_NUMBER_SIZE); + uint size; + kd.get_infimum_key(lower_bound, &size); + DBUG_ASSERT(size == Rdb_key_def::INDEX_NUMBER_SIZE); + kd.get_supremum_key(upper_bound, &size); + DBUG_ASSERT(size == Rdb_key_def::INDEX_NUMBER_SIZE); + } else { + DBUG_ASSERT(bound_len <= eq_cond.size()); + memcpy(upper_bound, eq_cond.data(), bound_len); + kd.successor(upper_bound, bound_len); + memcpy(lower_bound, eq_cond.data(), bound_len); + kd.predecessor(lower_bound, bound_len); + } + + if (kd.m_is_reverse_cf) { + *upper_bound_slice = rocksdb::Slice((const char *)lower_bound, bound_len); + *lower_bound_slice = rocksdb::Slice((const char *)upper_bound, bound_len); + } else { + *upper_bound_slice = rocksdb::Slice((const char *)upper_bound, bound_len); + *lower_bound_slice = rocksdb::Slice((const char *)lower_bound, bound_len); + } +} + +/* + Open a cursor +*/ + +void ha_rocksdb::setup_scan_iterator(const Rdb_key_def &kd, + rocksdb::Slice *const slice, + const bool use_all_keys, + const uint eq_cond_len) { + DBUG_ASSERT(slice->size() >= eq_cond_len); + + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + + bool skip_bloom = true; + + const rocksdb::Slice eq_cond(slice->data(), eq_cond_len); + // The size of m_scan_it_lower_bound (and upper) is technically + // max_packed_sk_len as calculated in ha_rocksdb::alloc_key_buffers. Rather + // than recalculating that number, we pass in the max of eq_cond_len and + // Rdb_key_def::INDEX_NUMBER_SIZE which is guaranteed to be smaller than + // max_packed_sk_len, hence ensuring no buffer overrun. + // + // See ha_rocksdb::setup_iterator_bounds on how the bound_len parameter is + // used. + if (check_bloom_and_set_bounds( + ha_thd(), kd, eq_cond, use_all_keys, + std::max(eq_cond_len, (uint)Rdb_key_def::INDEX_NUMBER_SIZE), + m_scan_it_lower_bound, m_scan_it_upper_bound, + &m_scan_it_lower_bound_slice, &m_scan_it_upper_bound_slice)) { + skip_bloom = false; + } + + /* + In some cases, setup_scan_iterator() is called multiple times from + the same query but bloom filter can not always be used. + Suppose the following query example. id2 is VARCHAR(30) and PRIMARY KEY + (id1, id2). + select count(*) from t2 WHERE id1=100 and id2 IN ('00000000000000000000', + '100'); + In this case, setup_scan_iterator() is called twice, the first time is for + (id1, id2)=(100, '00000000000000000000') and the second time is for (100, + '100'). + If prefix bloom filter length is 24 bytes, prefix bloom filter can be used + for the + first condition but not for the second condition. + If bloom filter condition is changed, currently it is necessary to destroy + and + re-create Iterator. + */ + if (m_scan_it_skips_bloom != skip_bloom) { + release_scan_iterator(); + } + + /* + SQL layer can call rnd_init() multiple times in a row. + In that case, re-use the iterator, but re-position it at the table start. + */ + if (!m_scan_it) { + const bool fill_cache = !THDVAR(ha_thd(), skip_fill_cache); + if (commit_in_the_middle()) { + DBUG_ASSERT(m_scan_it_snapshot == nullptr); + m_scan_it_snapshot = rdb->GetSnapshot(); + + auto read_opts = rocksdb::ReadOptions(); + // TODO(mung): set based on WHERE conditions + read_opts.total_order_seek = true; + read_opts.snapshot = m_scan_it_snapshot; + m_scan_it = rdb->NewIterator(read_opts, kd.get_cf()); + } else { + m_scan_it = tx->get_iterator(kd.get_cf(), skip_bloom, fill_cache, + m_scan_it_lower_bound_slice, + m_scan_it_upper_bound_slice); + } + m_scan_it_skips_bloom = skip_bloom; + } +} + +void ha_rocksdb::release_scan_iterator() { + delete m_scan_it; + m_scan_it = nullptr; + + if (m_scan_it_snapshot) { + rdb->ReleaseSnapshot(m_scan_it_snapshot); + m_scan_it_snapshot = nullptr; + } +} + +void ha_rocksdb::setup_iterator_for_rnd_scan() { + uint key_size; + + int key_start_matching_bytes = + m_pk_descr->get_first_key(m_pk_packed_tuple, &key_size); + + rocksdb::Slice table_key((const char *)m_pk_packed_tuple, key_size); + + setup_scan_iterator(*m_pk_descr, &table_key, false, key_start_matching_bytes); + m_scan_it->Seek(table_key); + m_skip_scan_it_next_call = true; +} + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::rnd_init(bool scan) { + DBUG_ENTER_FUNC(); + + THD *thd = ha_thd(); + if (thd && thd->killed) { + DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); + } + + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + + // when this table is being updated, decode all fields + m_converter->setup_field_decoders(table->read_set, + m_lock_rows == RDB_LOCK_WRITE); + + if (scan) { + m_rnd_scan_is_new_snapshot = !tx->has_snapshot(); + setup_iterator_for_rnd_scan(); + } else { + /* We don't need any preparations for rnd_pos() calls. */ + } + + // If m_lock_rows is on then we will be doing a get_for_update when accessing + // the index, so don't acquire the snapshot right away. Otherwise acquire + // the snapshot immediately. + tx->acquire_snapshot(m_lock_rows == RDB_LOCK_NONE); + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::rnd_next(uchar *const buf) { + DBUG_ENTER_FUNC(); + + int rc; + for (;;) { + rc = rnd_next_with_direction(buf, true); + if (!should_recreate_snapshot(rc, m_rnd_scan_is_new_snapshot)) { + break; /* exit the loop */ + } + // release the snapshot and iterator and then regenerate them + Rdb_transaction *tx = get_or_create_tx(table->in_use); + tx->release_snapshot(); + release_scan_iterator(); + setup_iterator_for_rnd_scan(); + } + + m_rnd_scan_is_new_snapshot = false; + + if (rc == HA_ERR_KEY_NOT_FOUND) rc = HA_ERR_END_OF_FILE; + + DBUG_RETURN(rc); +} + +/* + See also secondary_index_read(). +*/ +int ha_rocksdb::rnd_next_with_direction(uchar *const buf, bool move_forward) { + DBUG_ENTER_FUNC(); + + int rc; + THD *thd = ha_thd(); + + table->status = STATUS_NOT_FOUND; +#ifdef MARIAROCKS_NOT_YET + stats.rows_requested++; +#endif + if (!m_scan_it || !is_valid(m_scan_it)) { + /* + We can get here when SQL layer has called + + h->index_init(PRIMARY); + h->index_read_map(full index tuple, HA_READ_KEY_EXACT); + + In this case, we should return EOF. + */ + DBUG_RETURN(HA_ERR_END_OF_FILE); + } + + for (;;) { + DEBUG_SYNC(thd, "rocksdb.check_flags_rnwd"); + if (thd && thd->killed) { + rc = HA_ERR_QUERY_INTERRUPTED; + break; + } + + if (m_skip_scan_it_next_call) { + m_skip_scan_it_next_call = false; + } else { + if (move_forward) { + m_scan_it->Next(); /* this call cannot fail */ + } else { + m_scan_it->Prev(); /* this call cannot fail */ + } + } + + if (!is_valid(m_scan_it)) { + rc = HA_ERR_END_OF_FILE; + break; + } + + /* check if we're out of this table */ + const rocksdb::Slice key = m_scan_it->key(); + if (!m_pk_descr->covers_key(key)) { + rc = HA_ERR_END_OF_FILE; + break; + } + + if (m_lock_rows != RDB_LOCK_NONE) { + /* + Lock the row we've just read. + + Now we call get_for_update which will 1) Take a lock and 2) Will fail + if the row was deleted since the snapshot was taken. + */ + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + DEBUG_SYNC(ha_thd(), "rocksdb_concurrent_delete"); + + if (m_pk_descr->has_ttl() && + should_hide_ttl_rec(*m_pk_descr, m_scan_it->value(), + tx->m_snapshot_timestamp)) { + continue; + } + + const rocksdb::Status s = + get_for_update(tx, m_pk_descr->get_cf(), key, &m_retrieved_record); + if (s.IsNotFound() && + should_skip_invalidated_record(HA_ERR_KEY_NOT_FOUND)) { + continue; + } + + if (!s.ok()) { + DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, + m_tbl_def, m_table_handler)); + } + + // If we called get_for_update() use the value from that call not from + // the iterator as it may be stale since we don't have a snapshot + // when m_lock_rows is not RDB_LOCK_NONE. + m_last_rowkey.copy(key.data(), key.size(), &my_charset_bin); + rc = convert_record_from_storage_format(&key, buf); + } else { + // Use the value from the iterator + rocksdb::Slice value = m_scan_it->value(); + + if (m_pk_descr->has_ttl() && + should_hide_ttl_rec( + *m_pk_descr, value, + get_or_create_tx(table->in_use)->m_snapshot_timestamp)) { + continue; + } + + m_last_rowkey.copy(key.data(), key.size(), &my_charset_bin); + rc = convert_record_from_storage_format(&key, &value, buf); + } + + table->status = 0; + break; + } + + if (!rc) { +#ifdef MARIAROCKS_NOT_YET + stats.rows_read++; + stats.rows_index_next++; +#endif + update_row_stats(ROWS_READ); + } + + DBUG_RETURN(rc); +} + +int ha_rocksdb::rnd_end() { + DBUG_ENTER_FUNC(); + + release_scan_iterator(); + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::index_init(uint idx, bool sorted) { + DBUG_ENTER_FUNC(); + + THD *thd = ha_thd(); + if (thd && thd->killed) { + DBUG_RETURN(HA_ERR_QUERY_INTERRUPTED); + } + + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + DBUG_ASSERT(tx != nullptr); + + m_start_range= NULL; + // when this table is being updated, decode all fields + m_converter->setup_field_decoders(table->read_set, + m_lock_rows == RDB_LOCK_WRITE); + + if (!m_keyread_only) { + m_key_descr_arr[idx]->get_lookup_bitmap(table, &m_lookup_bitmap); + } + + // If m_lock_rows is not RDB_LOCK_NONE then we will be doing a get_for_update + // when accessing the index, so don't acquire the snapshot right away. + // Otherwise acquire the snapshot immediately. + tx->acquire_snapshot(m_lock_rows == RDB_LOCK_NONE); + + active_index = idx; + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/** + @return + HA_EXIT_SUCCESS OK +*/ +int ha_rocksdb::index_end() { + DBUG_ENTER_FUNC(); + + release_scan_iterator(); + + bitmap_free(&m_lookup_bitmap); + + active_index = MAX_KEY; + in_range_check_pushed_down = FALSE; + m_start_range= NULL; + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::truncate() { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(m_tbl_def != nullptr); + + // Save table name to use later + std::string table_name = m_tbl_def->full_tablename(); + + // Delete current table + int err = delete_table(m_tbl_def); + if (err != HA_EXIT_SUCCESS) { + DBUG_RETURN(err); + } + + // Reset auto_increment_value to 1 if auto-increment feature is enabled + // By default, the starting valid value for auto_increment_value is 1 + DBUG_RETURN(create_table( + table_name, table, + table->found_next_number_field ? 1 : 0 /* auto_increment_value */)); +} + +/* + Delete the row we've last read. The row is also passed as parameter. + + @detail + The caller guarantees table buf points to the row that was just read. + The row is either table->record[0] or table->record[1]. + (Check out InnoDB: row_update_for_mysql() has "UT_NOT_USED(mysql_rec)" + + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::delete_row(const uchar *const buf) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(buf != nullptr); + + set_last_rowkey(buf); + + rocksdb::Slice key_slice(m_last_rowkey.ptr(), m_last_rowkey.length()); + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + ulonglong bytes_written = 0; + + const uint index = pk_index(table, m_tbl_def); + rocksdb::Status s = + delete_or_singledelete(index, tx, m_pk_descr->get_cf(), key_slice); + if (!s.ok()) { + DBUG_RETURN(tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def, + m_table_handler)); + } else { + bytes_written = key_slice.size(); + } + + longlong hidden_pk_id = 0; + if (m_tbl_def->m_key_count > 1 && has_hidden_pk(table)) { + int err = read_hidden_pk_id_from_rowkey(&hidden_pk_id); + if (err) { + DBUG_RETURN(err); + } + } + + // Delete the record for every secondary index + for (uint i = 0; i < m_tbl_def->m_key_count; i++) { + if (!is_pk(i, table, m_tbl_def)) { + int packed_size; + const Rdb_key_def &kd = *m_key_descr_arr[i]; + packed_size = kd.pack_record(table, m_pack_buffer, buf, m_sk_packed_tuple, + nullptr, false, hidden_pk_id); + rocksdb::Slice secondary_key_slice( + reinterpret_cast(m_sk_packed_tuple), packed_size); + /* Deleting on secondary key doesn't need any locks: */ + tx->get_indexed_write_batch()->SingleDelete(kd.get_cf(), + secondary_key_slice); + bytes_written += secondary_key_slice.size(); + } + } + + tx->incr_delete_count(); + tx->log_table_write_op(m_tbl_def); + + if (do_bulk_commit(tx)) { + DBUG_RETURN(HA_ERR_ROCKSDB_BULK_LOAD); + } +#ifdef MARIAROCKS_NOT_YET + stats.rows_deleted++; +#endif + update_row_stats(ROWS_DELETED); + tx->update_bytes_written(bytes_written); + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +rocksdb::Status ha_rocksdb::delete_or_singledelete( + uint index, Rdb_transaction *const tx, + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key) { + const bool assume_tracked = can_assume_tracked(ha_thd()); + if (can_use_single_delete(index)) { + return tx->single_delete(column_family, key, assume_tracked); + } + return tx->delete_key(column_family, key, assume_tracked); +} + +void ha_rocksdb::update_stats(void) { + DBUG_ENTER_FUNC(); + + stats.records = 0; + stats.index_file_length = 0ul; + stats.data_file_length = 0ul; + stats.mean_rec_length = 0; + + for (uint i = 0; i < m_tbl_def->m_key_count; i++) { + if (is_pk(i, table, m_tbl_def)) { + stats.data_file_length = m_pk_descr->m_stats.m_actual_disk_size; + stats.records = m_pk_descr->m_stats.m_rows; + } else { + stats.index_file_length += m_key_descr_arr[i]->m_stats.m_actual_disk_size; + } + } + + DBUG_VOID_RETURN; +} + +/** + @return + HA_EXIT_SUCCESS OK + HA_EXIT_FAILURE Error +*/ +int ha_rocksdb::info(uint flag) { + DBUG_ENTER_FUNC(); + + if (!table) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + + if (flag & HA_STATUS_VARIABLE) { + /* + Test only to simulate corrupted stats + */ + DBUG_EXECUTE_IF("myrocks_simulate_negative_stats", + m_pk_descr->m_stats.m_actual_disk_size = + -m_pk_descr->m_stats.m_actual_disk_size;); + + update_stats(); + + /* + If any stats are negative due to bad cached stats, re-run analyze table + and re-retrieve the stats. + */ + if (static_cast(stats.data_file_length) < 0 || + static_cast(stats.index_file_length) < 0 || + static_cast(stats.records) < 0) { + if (calculate_stats_for_table()) { + DBUG_RETURN(HA_EXIT_FAILURE); + } + + update_stats(); + } + + // if number of records is hardcoded, we do not want to force computation + // of memtable cardinalities + if (stats.records == 0 || (rocksdb_force_compute_memtable_stats && + rocksdb_debug_optimizer_n_rows == 0)) { + // First, compute SST files stats + uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; + auto r = get_range(pk_index(table, m_tbl_def), buf); + uint64_t sz = 0; + uint8_t include_flags = rocksdb::DB::INCLUDE_FILES; + // recompute SST files stats only if records count is 0 + if (stats.records == 0) { + rdb->GetApproximateSizes(m_pk_descr->get_cf(), &r, 1, &sz, + include_flags); + stats.records += sz / ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE; + stats.data_file_length += sz; + } + // Second, compute memtable stats. This call is expensive, so cache + // values computed for some time. + uint64_t cachetime = rocksdb_force_compute_memtable_stats_cachetime; + uint64_t time = (cachetime == 0) ? 0 : my_interval_timer() / 1000; + if (cachetime == 0 || + time > m_table_handler->m_mtcache_last_update + cachetime) { + uint64_t memtableCount; + uint64_t memtableSize; + + // the stats below are calculated from skiplist wich is a probablistic + // data structure, so the results vary between test runs + // it also can return 0 for quite a large tables which means that + // cardinality for memtable only indxes will be reported as 0 + rdb->GetApproximateMemTableStats(m_pk_descr->get_cf(), r, + &memtableCount, &memtableSize); + + // Atomically update all of these fields at the same time + if (cachetime > 0) { + if (m_table_handler->m_mtcache_lock.fetch_add( + 1, std::memory_order_acquire) == 0) { + m_table_handler->m_mtcache_count = memtableCount; + m_table_handler->m_mtcache_size = memtableSize; + m_table_handler->m_mtcache_last_update = time; + } + m_table_handler->m_mtcache_lock.fetch_sub(1, + std::memory_order_release); + } + + stats.records += memtableCount; + stats.data_file_length += memtableSize; + } else { + // Cached data is still valid, so use it instead + stats.records += m_table_handler->m_mtcache_count; + stats.data_file_length += m_table_handler->m_mtcache_size; + } + + // Do like InnoDB does. stats.records=0 confuses the optimizer + if (stats.records == 0 && !(flag & (HA_STATUS_TIME | HA_STATUS_OPEN))) { + stats.records++; + } + } + + if (rocksdb_debug_optimizer_n_rows > 0) + stats.records = rocksdb_debug_optimizer_n_rows; + + if (stats.records != 0) { + stats.mean_rec_length = stats.data_file_length / stats.records; + } + } + + if (flag & HA_STATUS_CONST) { + ref_length = m_pk_descr->max_storage_fmt_length(); + + for (uint i = 0; i < m_tbl_def->m_key_count; i++) { + if (is_hidden_pk(i, table, m_tbl_def)) { + continue; + } + KEY *const k = &table->key_info[i]; + for (uint j = 0; j < k->ext_key_parts; j++) { + const Rdb_index_stats &k_stats = m_key_descr_arr[i]->m_stats; + uint x; + + if (k_stats.m_distinct_keys_per_prefix.size() > j && + k_stats.m_distinct_keys_per_prefix[j] > 0) { + x = k_stats.m_rows / k_stats.m_distinct_keys_per_prefix[j]; + /* + If the number of rows is less than the number of prefixes (due to + sampling), the average number of rows with the same prefix is 1. + */ + if (x == 0) { + x = 1; + } + } else { + x = 0; + } + if (x > stats.records) x = stats.records; + if ((x == 0 && rocksdb_debug_optimizer_no_zero_cardinality) || + rocksdb_debug_optimizer_n_rows > 0) { + // Fake cardinality implementation. For example, (idx1, idx2, idx3) + // index + /* + Make MariaRocks behave the same way as MyRocks does: + 1. SQL layer thinks that unique secondary indexes are not extended + with PK columns (both in MySQL and MariaDB) + 2. MariaDB also thinks that indexes with partially-covered columns + are not extended with PK columns. Use the same number of + keyparts that MyRocks would use. + */ + uint ext_key_parts2; + if (k->flags & HA_NOSAME) + ext_key_parts2= k->ext_key_parts; // This is #1 + else + ext_key_parts2= m_key_descr_arr[i]->get_key_parts(); // This is #2. + + // will have rec_per_key for (idx1)=4, (idx1,2)=2, and (idx1,2,3)=1. + // rec_per_key for the whole index is 1, and multiplied by 2^n if + // n suffix columns of the index are not used. + x = 1 << (ext_key_parts2 - j - 1); + } + k->rec_per_key[j] = x; + } + } + + stats.create_time = m_tbl_def->get_create_time(); + } + + if (flag & HA_STATUS_TIME) { + stats.update_time = m_tbl_def->m_update_time; + } + + if (flag & HA_STATUS_ERRKEY) { + /* + Currently we support only primary keys so we know which key had a + uniqueness violation. + */ + errkey = m_dupp_errkey; + dup_ref = m_pk_tuple; // TODO(?): this should store packed PK. + } + + if (flag & HA_STATUS_AUTO) { + stats.auto_increment_value = m_tbl_def->m_auto_incr_val; + } + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +void ha_rocksdb::position(const uchar *const record) { + DBUG_ENTER_FUNC(); + + longlong hidden_pk_id = 0; + if (has_hidden_pk(table) && read_hidden_pk_id_from_rowkey(&hidden_pk_id)) { + DBUG_ASSERT(false); // should never reach here + } + + /* + Get packed primary key value from the record. + + (Note: m_last_rowkey has the packed PK of last-read-row, which allows to + handle most cases, but there is an exception to this: when slave applies + RBR events, it fills the record and then calls position(); rnd_pos()) + + Also note that we *can* rely on PK column values being available. This is + because table_flags() includes HA_PRIMARY_KEY_REQUIRED_FOR_POSITION bit. + When that is true, table->prepare_for_position() adds PK columns into the + read set (this may potentially disable index-only access when PK column + cannot be restored from its mem-comparable form in the secondary indexes). + */ + const uint packed_size = m_pk_descr->pack_record( + table, m_pack_buffer, record, ref, nullptr, false, hidden_pk_id); + + /* + It could be that mem-comparable form of PK occupies less than ref_length + bytes. Fill the remainder with zeros. + */ + if (ref_length > packed_size) { + memset(ref + packed_size, 0, ref_length - packed_size); + } + + DBUG_VOID_RETURN; +} + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::rnd_pos(uchar *const buf, uchar *const pos) { + DBUG_ENTER_FUNC(); + + int rc; + size_t len; + +#ifdef MARIAROCKS_NOT_YET + stats.rows_requested++; +#endif + len = m_pk_descr->key_length(table, + rocksdb::Slice((const char *)pos, ref_length)); + if (len == size_t(-1)) { + DBUG_RETURN(HA_ERR_ROCKSDB_CORRUPT_DATA); /* Data corruption? */ + } + + rc = get_row_by_rowid(buf, pos, len); + + if (!rc) { +#ifdef MARIAROCKS_NOT_YET + stats.rows_read++; +#endif + update_row_stats(ROWS_READ); + } + + DBUG_RETURN(rc); +} + +/* + @brief + Calculate (if needed) the bitmap of indexes that are modified by the + current query + + @detail + The calculation done by checking index definitions against the + table->write_set + column bitmap. +*/ + +void ha_rocksdb::calc_updated_indexes() { + if (!m_update_scope_is_valid) { + m_update_scope_is_valid = true; + m_update_scope.clear_all(); + + for (uint keynr = 0; keynr < table->s->keys; keynr++) { + const Rdb_key_def &kd = *m_key_descr_arr[keynr]; + /* Walk over all key parts, including the "extended key" suffix */ + const uint key_parts = kd.get_key_parts(); + for (uint kp = 0; kp < key_parts; kp++) { + if (has_hidden_pk(table) && kp + 1 == key_parts) break; + + Field *const field = kd.get_table_field_for_part_no(table, kp); + if (bitmap_is_set(table->write_set, field->field_index)) { + m_update_scope.set_bit(keynr); + break; + } + } + } + } +} + +/** + Update an existing row + @param[in] old_data nullptr for write, non-null for update + @param[in] new_data non-null for write/update + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::update_row(const uchar *const old_data, + const uchar *const new_data) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(old_data != nullptr); + DBUG_ASSERT(new_data != nullptr); + DBUG_ASSERT(m_lock_rows == RDB_LOCK_WRITE); + /* + old_data points to record we're updating. It is the same as the record + we've just read (for multi-table UPDATE, too, because SQL layer will make + an rnd_pos() call to re-read the record before calling update_row()) + */ + DBUG_ASSERT(new_data == table->record[0]); + + const int rv = update_write_row(old_data, new_data, skip_unique_check()); + + if (rv == 0) { +#ifdef MARIAROCKS_NOT_YET + stats.rows_updated++; +#endif + update_row_stats(ROWS_UPDATED); + } + + DBUG_RETURN(rv); +} + +/* + MariaDB's temporary: MyRocks has this function in sql/handler.cc: +*/ + +bool can_hold_read_locks_on_select(THD *thd, thr_lock_type lock_type) +{ + return (lock_type == TL_READ_WITH_SHARED_LOCKS + || lock_type == TL_READ_NO_INSERT + || (lock_type != TL_IGNORE + && thd->lex->sql_command != SQLCOM_SELECT)); +} + + +/* The following function was copied from ha_blackhole::store_lock: */ +THR_LOCK_DATA **ha_rocksdb::store_lock(THD *const thd, THR_LOCK_DATA **to, + enum thr_lock_type lock_type) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(thd != nullptr); + DBUG_ASSERT(to != nullptr); + + bool in_lock_tables = my_core::thd_in_lock_tables(thd); + + /* First, make a decision about MyRocks's internal locking */ + if (lock_type >= TL_WRITE_ALLOW_WRITE) { + m_lock_rows = RDB_LOCK_WRITE; + } else if (lock_type == TL_READ_WITH_SHARED_LOCKS) { + m_lock_rows = RDB_LOCK_READ; + } else if (lock_type != TL_IGNORE) { + m_lock_rows = RDB_LOCK_NONE; + if (THDVAR(thd, lock_scanned_rows)) { + /* + The following logic was copied directly from + ha_innobase::store_lock_with_x_type() in + storage/innobase/handler/ha_innodb.cc and causes MyRocks to leave + locks in place on rows that are in a table that is not being updated. + */ + const uint sql_command = my_core::thd_sql_command(thd); + if ((lock_type == TL_READ && in_lock_tables) || + (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables) || + can_hold_read_locks_on_select(thd, lock_type)) { + ulong tx_isolation = my_core::thd_tx_isolation(thd); + if (sql_command != SQLCOM_CHECKSUM && + ((my_core::thd_test_options(thd, OPTION_BIN_LOG) && + tx_isolation > ISO_READ_COMMITTED) || + tx_isolation == ISO_SERIALIZABLE || + (lock_type != TL_READ && lock_type != TL_READ_NO_INSERT) || + (sql_command != SQLCOM_INSERT_SELECT && + sql_command != SQLCOM_REPLACE_SELECT && + sql_command != SQLCOM_UPDATE && sql_command != SQLCOM_DELETE && + sql_command != SQLCOM_CREATE_TABLE))) { + m_lock_rows = RDB_LOCK_READ; + } + } + } + } + + /* Then, tell the SQL layer what kind of locking it should use: */ + if (lock_type != TL_IGNORE && m_db_lock.type == TL_UNLOCK) { + /* + Here is where we get into the guts of a row level lock. + If TL_UNLOCK is set + If we are not doing a LOCK TABLE or DISCARD/IMPORT + TABLESPACE, then allow multiple writers + */ + + if ((lock_type >= TL_WRITE_CONCURRENT_INSERT && lock_type <= TL_WRITE) && + !in_lock_tables && !my_core::thd_tablespace_op(thd)) { + lock_type = TL_WRITE_ALLOW_WRITE; + } + + /* + In queries of type INSERT INTO t1 SELECT ... FROM t2 ... + MySQL would use the lock TL_READ_NO_INSERT on t2, and that + would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts + to t2. Convert the lock to a normal read lock to allow + concurrent inserts to t2. + */ + + if (lock_type == TL_READ_NO_INSERT && !in_lock_tables) { + lock_type = TL_READ; + } + + m_db_lock.type = lock_type; + } + + *to++ = &m_db_lock; + + DBUG_RETURN(to); +} + +void ha_rocksdb::read_thd_vars(THD *const thd) { + m_store_row_debug_checksums = THDVAR(thd, store_row_debug_checksums); + m_converter->set_verify_row_debug_checksums( + THDVAR(thd, verify_row_debug_checksums)); + m_checksums_pct = THDVAR(thd, checksums_pct); +} + +ulonglong ha_rocksdb::table_flags() const +{ + DBUG_ENTER_FUNC(); + + /* + HA_BINLOG_STMT_CAPABLE + Upstream: MyRocks advertises itself as it supports SBR, but has additional + checks in ha_rocksdb::external_lock()/ start_stmt() which will return an + error if one tries to run the statement. + Exceptions: @@rocksdb_unsafe_for_binlog or we are an SQL slave thread. + + MariaDB: Inform the upper layer we don't support SBR, so it switches to RBR + if possible. The exceptions are the same as with the upstream. + + HA_REC_NOT_IN_SEQ + If we don't set it, filesort crashes, because it assumes rowids are + 1..8 byte numbers + HA_PRIMARY_KEY_IN_READ_INDEX + This flag is always set, even for tables that: + - have no PK + - have some (or all) of PK that can't be decoded from the secondary + index. + */ + THD *thd= ha_thd(); + DBUG_RETURN(HA_BINLOG_ROW_CAPABLE | + ((thd && (THDVAR(thd, unsafe_for_binlog) ||thd->rgi_slave))? + HA_BINLOG_STMT_CAPABLE : 0) | + HA_REC_NOT_IN_SEQ | HA_CAN_INDEX_BLOBS | + HA_PRIMARY_KEY_IN_READ_INDEX | + HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | HA_NULL_IN_KEY | + HA_PARTIAL_COLUMN_READ | + HA_TABLE_SCAN_ON_INDEX); +} + + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (cannot be SE-specific) +*/ +int ha_rocksdb::external_lock(THD *const thd, int lock_type) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(thd != nullptr); + + int res = HA_EXIT_SUCCESS; +#if 0 + // MariaDB uses a different way to implement this, see ha_rocksdb::table_flags + + int binlog_format = my_core::thd_binlog_format(thd); + bool unsafe_for_binlog = THDVAR(ha_thd(), unsafe_for_binlog); + + /* + A note about (*) below: In fb/mysql, LOCK TABLE myrocks_table WRITE + - produces an error when @@binlog_format='ROW'. + - does not produce an error with @@binlog_format='STATEMENT' + + We cannot easily provide this behavior in MariaDB, because LOCK ROWS + changes @@binlog_format for the duration of the statement. + + Instead we will make the check in ha_rocksdb::start_stmt and will produce + an error in the first DML statement done under LOCK TABLE + + (**) - The same happens for other "DDL-type" commands, so allow them here + as well. + */ + if (lock_type == F_WRLCK && !thd->rgi_slave && !unsafe_for_binlog && + binlog_format != BINLOG_FORMAT_ROW && + binlog_format != BINLOG_FORMAT_UNSPEC && + thd->lex->sql_command != SQLCOM_LOCK_TABLES && // (*) + thd->lex->sql_command != SQLCOM_ANALYZE && // (**) + thd->lex->sql_command != SQLCOM_OPTIMIZE && // (**) + thd->lex->sql_command != SQLCOM_FLUSH && // (**) + my_core::thd_binlog_filter_ok(thd)) { + my_error(ER_REQUIRE_ROW_BINLOG_FORMAT, MYF(0)); + DBUG_RETURN(HA_ERR_UNSUPPORTED); + } +#endif + + if (lock_type == F_UNLCK) { + Rdb_transaction *const tx = get_tx_from_thd(thd); + + if (tx) { + tx->io_perf_end_and_record(&m_io_perf); + tx->m_n_mysql_tables_in_use--; + if (tx->m_n_mysql_tables_in_use == 0 && + !my_core::thd_test_options(thd, + OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { + /* + Do like InnoDB: when we get here, it's time to commit a + single-statement transaction. + + If the statement involved multiple tables, this code will be executed + for each of them, but that's ok because non-first tx->commit() calls + will be no-ops. + */ + if (tx->commit_or_rollback()) { + res = HA_ERR_INTERNAL_ERROR; + } + } + } + } else { + if (my_core::thd_tx_isolation(thd) < ISO_READ_COMMITTED || + my_core::thd_tx_isolation(thd) > ISO_REPEATABLE_READ) { + my_error(ER_ISOLATION_MODE_NOT_SUPPORTED, MYF(0), + tx_isolation_names[my_core::thd_tx_isolation(thd)]); + DBUG_RETURN(HA_ERR_UNSUPPORTED); + } + /* + It's nice to do the following on start of every statement. The problem + is, handler->start_stmt() is not called for INSERTs. + So, we put this code here. + */ + Rdb_transaction *const tx = get_or_create_tx(thd); + read_thd_vars(thd); + + m_update_scope_is_valid = false; + + if (skip_unique_check()) { + if ((thd->lex->sql_command == SQLCOM_INSERT || + thd->lex->sql_command == SQLCOM_LOAD || + thd->lex->sql_command == SQLCOM_REPLACE) && + (thd->lex->duplicates == DUP_REPLACE || + thd->lex->duplicates == DUP_UPDATE)) { + my_error(ER_ON_DUPLICATE_DISABLED, MYF(0), thd->query()); + DBUG_RETURN(HA_ERR_UNSUPPORTED); + } + } + + if (lock_type == F_WRLCK) { + if (tx->is_tx_read_only()) { + my_error(ER_UPDATES_WITH_CONSISTENT_SNAPSHOT, MYF(0)); + DBUG_RETURN(HA_ERR_UNSUPPORTED); + } + +#ifdef MARIADB_NOT_YET + if (thd->get_explicit_snapshot()) { + my_error(ER_UPDATES_WITH_EXPLICIT_SNAPSHOT, MYF(0)); + DBUG_RETURN(HA_ERR_UNSUPPORTED); + } +#endif + + /* + SQL layer signals us to take a write lock. It does so when starting DML + statement. We should put locks on the rows we're reading. + + Note: sometimes, external_lock() can be called without a prior + ::store_lock call. That's why we need to set lock_* members here, too. + */ + m_lock_rows = RDB_LOCK_WRITE; + + if (thd->lex->sql_command == SQLCOM_CREATE_INDEX || + thd->lex->sql_command == SQLCOM_DROP_INDEX || + thd->lex->sql_command == SQLCOM_ALTER_TABLE) { + tx->m_ddl_transaction = true; + } + } + tx->m_n_mysql_tables_in_use++; + rocksdb_register_tx(rocksdb_hton, thd, tx); + tx->io_perf_start(&m_io_perf); + } + + DBUG_RETURN(res); +} + +/** + @note + A quote from ha_innobase::start_stmt(): + + MySQL calls this function at the start of each SQL statement inside LOCK + TABLES. Inside LOCK TABLES the ::external_lock method does not work to + mark SQL statement borders. + + + @return + HA_EXIT_SUCCESS OK +*/ + +int ha_rocksdb::start_stmt(THD *const thd, thr_lock_type lock_type) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(thd != nullptr); + + Rdb_transaction *const tx = get_or_create_tx(thd); + read_thd_vars(thd); + rocksdb_register_tx(ht, thd, tx); + tx->io_perf_start(&m_io_perf); + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +rocksdb::Range get_range(uint32_t i, + uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2], + int offset1, int offset2) { + uchar *buf_begin = buf; + uchar *buf_end = buf + Rdb_key_def::INDEX_NUMBER_SIZE; + rdb_netbuf_store_index(buf_begin, i + offset1); + rdb_netbuf_store_index(buf_end, i + offset2); + + return rocksdb::Range( + rocksdb::Slice((const char *)buf_begin, Rdb_key_def::INDEX_NUMBER_SIZE), + rocksdb::Slice((const char *)buf_end, Rdb_key_def::INDEX_NUMBER_SIZE)); +} + +static rocksdb::Range get_range(const Rdb_key_def &kd, + uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2], + int offset1, int offset2) { + return get_range(kd.get_index_number(), buf, offset1, offset2); +} + +rocksdb::Range get_range(const Rdb_key_def &kd, + uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]) { + if (kd.m_is_reverse_cf) { + return myrocks::get_range(kd, buf, 1, 0); + } else { + return myrocks::get_range(kd, buf, 0, 1); + } +} + +rocksdb::Range ha_rocksdb::get_range( + const int i, uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]) const { + return myrocks::get_range(*m_key_descr_arr[i], buf); +} + +/* + This function is called with total_order_seek=true, but + upper/lower bound setting is not necessary. + Boundary set is useful when there is no matching key, + but in drop_index_thread's case, it means index is marked as removed, + so no further seek will happen for the index id. +*/ +static bool is_myrocks_index_empty(rocksdb::ColumnFamilyHandle *cfh, + const bool is_reverse_cf, + const rocksdb::ReadOptions &read_opts, + const uint index_id) { + bool index_removed = false; + uchar key_buf[Rdb_key_def::INDEX_NUMBER_SIZE] = {0}; + rdb_netbuf_store_uint32(key_buf, index_id); + const rocksdb::Slice key = + rocksdb::Slice(reinterpret_cast(key_buf), sizeof(key_buf)); + std::unique_ptr it(rdb->NewIterator(read_opts, cfh)); + rocksdb_smart_seek(is_reverse_cf, it.get(), key); + if (!it->Valid()) { + index_removed = true; + } else { + if (memcmp(it->key().data(), key_buf, Rdb_key_def::INDEX_NUMBER_SIZE)) { + // Key does not have same prefix + index_removed = true; + } + } + return index_removed; +} + +/* + Drop index thread's main logic +*/ + +void Rdb_drop_index_thread::run() { + RDB_MUTEX_LOCK_CHECK(m_signal_mutex); + + for (;;) { + // The stop flag might be set by shutdown command + // after drop_index_thread releases signal_mutex + // (i.e. while executing expensive Seek()). To prevent drop_index_thread + // from entering long cond_timedwait, checking if stop flag + // is true or not is needed, with drop_index_interrupt_mutex held. + if (m_stop) { + break; + } + + timespec ts; + int sec= dict_manager.is_drop_index_empty() + ? 24 * 60 * 60 // no filtering + : 60; // filtering + set_timespec(ts,sec); + + const auto ret MY_ATTRIBUTE((__unused__)) = + mysql_cond_timedwait(&m_signal_cond, &m_signal_mutex, &ts); + if (m_stop) { + break; + } + // make sure, no program error is returned + DBUG_ASSERT(ret == 0 || ret == ETIMEDOUT); + RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex); + + std::unordered_set indices; + dict_manager.get_ongoing_drop_indexes(&indices); + if (!indices.empty()) { + std::unordered_set finished; + rocksdb::ReadOptions read_opts; + read_opts.total_order_seek = true; // disable bloom filter + + for (const auto d : indices) { + uint32 cf_flags = 0; + if (!dict_manager.get_cf_flags(d.cf_id, &cf_flags)) { + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Failed to get column family flags " + "from cf id %u. MyRocks data dictionary may " + "get corrupted.", + d.cf_id); + abort(); + } + rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(d.cf_id); + DBUG_ASSERT(cfh); + const bool is_reverse_cf = cf_flags & Rdb_key_def::REVERSE_CF_FLAG; + + uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; + rocksdb::Range range = get_range(d.index_id, buf, is_reverse_cf ? 1 : 0, + is_reverse_cf ? 0 : 1); + rocksdb::Status status = DeleteFilesInRange(rdb->GetBaseDB(), cfh, + &range.start, &range.limit); + if (!status.ok()) { + if (status.IsShutdownInProgress()) { + break; + } + rdb_handle_io_error(status, RDB_IO_ERROR_BG_THREAD); + } + status = rdb->CompactRange(getCompactRangeOptions(), cfh, &range.start, + &range.limit); + if (!status.ok()) { + if (status.IsShutdownInProgress()) { + break; + } + rdb_handle_io_error(status, RDB_IO_ERROR_BG_THREAD); + } + if (is_myrocks_index_empty(cfh, is_reverse_cf, read_opts, d.index_id)) { + finished.insert(d); + } + } + + if (!finished.empty()) { + dict_manager.finish_drop_indexes(finished); + } + } + RDB_MUTEX_LOCK_CHECK(m_signal_mutex); + } + + RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex); +} + +Rdb_tbl_def *ha_rocksdb::get_table_if_exists(const char *const tablename) { + std::string str; + if (rdb_normalize_tablename(tablename, &str) != HA_EXIT_SUCCESS) { + // We were not passed table name? + DBUG_ASSERT(0); + return nullptr; + } + + return ddl_manager.find(str); +} + +/* + Overload func for delete table ---it deletes table meta data in data + dictionary immediately and delete real data in background thread(async) + + @param tbl IN MyRocks table definition + + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ +int ha_rocksdb::delete_table(Rdb_tbl_def *const tbl) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(tbl != nullptr); + DBUG_ASSERT(m_tbl_def == nullptr || m_tbl_def == tbl); + + const std::unique_ptr wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); + + dict_manager.add_drop_table(tbl->m_key_descr_arr, tbl->m_key_count, batch); + + /* + Remove the table entry in data dictionary (this will also remove it from + the persistent data dictionary). + */ + ddl_manager.remove(tbl, batch, true); + + int err = dict_manager.commit(batch); + if (err) { + DBUG_RETURN(err); + } + + rdb_drop_idx_thread.signal(); + // avoid dangling pointer + m_tbl_def = nullptr; + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/* + Note: the following function is called when the table is not open. That is, + this->table==nullptr, pk_key_descr==nullptr, etc. + + tablename points to line in form "./dbname/tablename". + + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (can be SE-specific) +*/ + +int ha_rocksdb::delete_table(const char *const tablename) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(tablename != nullptr); + + /* Find the table in the hash */ + Rdb_tbl_def *const tbl = get_table_if_exists(tablename); + if (!tbl) { + DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); + } + + DBUG_RETURN(delete_table(tbl)); +} + +int ha_rocksdb::remove_rows(Rdb_tbl_def *const tbl) { + const rocksdb::WriteOptions wo = + rdb_get_rocksdb_write_options(handler::ha_thd()); + + rocksdb::ReadOptions opts; + opts.total_order_seek = true; + Rdb_transaction *const tx = get_or_create_tx(table->in_use); + + char key_buf[MAX_KEY_LENGTH]; + uint key_len; + ulonglong bytes_written = 0; + + uchar lower_bound_buf[Rdb_key_def::INDEX_NUMBER_SIZE]; + uchar upper_bound_buf[Rdb_key_def::INDEX_NUMBER_SIZE]; + rocksdb::Slice lower_bound_slice; + rocksdb::Slice upper_bound_slice; + + /* + Remove all records in each index. + (This is is not crash-safe, but it doesn't matter, because bulk row + deletion will be handled on rocksdb side) + */ + for (uint i = 0; i < tbl->m_key_count; i++) { + const Rdb_key_def &kd = *tbl->m_key_descr_arr[i]; + kd.get_infimum_key(reinterpret_cast(key_buf), &key_len); + rocksdb::ColumnFamilyHandle *cf = kd.get_cf(); + const rocksdb::Slice table_key(key_buf, key_len); + setup_iterator_bounds(kd, table_key, Rdb_key_def::INDEX_NUMBER_SIZE, + lower_bound_buf, upper_bound_buf, &lower_bound_slice, + &upper_bound_slice); + DBUG_ASSERT(key_len == Rdb_key_def::INDEX_NUMBER_SIZE); + opts.iterate_lower_bound = &lower_bound_slice; + opts.iterate_upper_bound = &upper_bound_slice; + std::unique_ptr it(rdb->NewIterator(opts, cf)); + + it->Seek(table_key); + while (it->Valid()) { + const rocksdb::Slice key = it->key(); + if (!kd.covers_key(key)) { + break; + } + + rocksdb::Status s; + if (can_use_single_delete(i)) { + s = rdb->SingleDelete(wo, cf, key); + } else { + s = rdb->Delete(wo, cf, key); + } + + if (!s.ok()) { + return tx->set_status_error(table->in_use, s, *m_pk_descr, m_tbl_def, + m_table_handler); + } + bytes_written += key.size(); + it->Next(); + } + } + + tx->update_bytes_written(bytes_written); + + return HA_EXIT_SUCCESS; +} + +/** + @return + HA_EXIT_SUCCESS OK + other HA_ERR error code (cannot be SE-specific) +*/ +int ha_rocksdb::rename_table(const char *const from, const char *const to) { + DBUG_ENTER_FUNC(); + + std::string from_str; + std::string to_str; + std::string from_db; + std::string to_db; + + int rc = rdb_normalize_tablename(from, &from_str); + if (rc != HA_EXIT_SUCCESS) { + DBUG_RETURN(rc); + } + + rc = rdb_split_normalized_tablename(from_str, &from_db); + if (rc != HA_EXIT_SUCCESS) { + DBUG_RETURN(rc); + } + + rc = rdb_normalize_tablename(to, &to_str); + if (rc != HA_EXIT_SUCCESS) { + DBUG_RETURN(rc); + } + + rc = rdb_split_normalized_tablename(to_str, &to_db); + if (rc != HA_EXIT_SUCCESS) { + DBUG_RETURN(rc); + } + + // If the user changed the database part of the name then validate that the + // 'to' database exists. + if (from_db != to_db && !rdb_database_exists(to_db)) { + // If we return a RocksDB specific error code here we get + // "error: 206 - Unknown error 206". InnoDB gets + // "error -1 - Unknown error -1" so let's match them. + DBUG_RETURN(-1); + } + + DBUG_EXECUTE_IF("gen_sql_table_name", to_str = to_str + "#sql-test";); + + const std::unique_ptr wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); + dict_manager.lock(); + + if (ddl_manager.rename(from_str, to_str, batch)) { + rc = HA_ERR_NO_SUCH_TABLE; + } else { + rc = dict_manager.commit(batch); + } + dict_manager.unlock(); + + DBUG_RETURN(rc); +} + +/** + check_if_incompatible_data() called if ALTER TABLE can't detect otherwise + if new and old definition are compatible + + @details If there are no other explicit signs like changed number of + fields this function will be called by compare_tables() + (sql/sql_tables.cc) to decide should we rewrite whole table or only .frm + file. + +*/ + +bool ha_rocksdb::check_if_incompatible_data(HA_CREATE_INFO *const info, + uint table_changes) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(info != nullptr); + + // this function is needed only for online alter-table + DBUG_RETURN(COMPATIBLE_DATA_NO); +} + +/** + @return + HA_EXIT_SUCCESS OK +*/ +int ha_rocksdb::extra(enum ha_extra_function operation) { + DBUG_ENTER_FUNC(); + + switch (operation) { + case HA_EXTRA_KEYREAD: + m_keyread_only = true; + break; + case HA_EXTRA_NO_KEYREAD: + m_keyread_only = false; + break; + case HA_EXTRA_FLUSH: + /* + If the table has blobs, then they are part of m_retrieved_record. + This call invalidates them. + */ + m_retrieved_record.Reset(); + break; + case HA_EXTRA_INSERT_WITH_UPDATE: + // INSERT ON DUPLICATE KEY UPDATE + if (rocksdb_enable_insert_with_update_caching) { + m_insert_with_update = true; + } + break; + case HA_EXTRA_NO_IGNORE_DUP_KEY: + // PAIRED with HA_EXTRA_INSERT_WITH_UPDATE or HA_EXTRA_WRITE_CAN_REPLACE + // that indicates the end of REPLACE / INSERT ON DUPLICATE KEY + m_insert_with_update = false; + break; + + default: + break; + } + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/* + Given a starting key and an ending key, estimate the number of rows that + will exist between the two keys. +*/ +ha_rows ha_rocksdb::records_in_range(uint inx, const key_range *const min_key, + const key_range *const max_key, + page_range *pages) { + DBUG_ENTER_FUNC(); + + ha_rows ret = THDVAR(ha_thd(), records_in_range); + if (ret) { + DBUG_RETURN(ret); + } + if (table->force_index) { + const ha_rows force_rows = THDVAR(ha_thd(), force_index_records_in_range); + if (force_rows) { + DBUG_RETURN(force_rows); + } + } + + const Rdb_key_def &kd = *m_key_descr_arr[inx]; + + uint size1 = 0; + if (min_key) { + size1 = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple, + m_record_buffer, + min_key->key, min_key->keypart_map); + if (min_key->flag == HA_READ_PREFIX_LAST_OR_PREV || + min_key->flag == HA_READ_PREFIX_LAST || + min_key->flag == HA_READ_AFTER_KEY) { + kd.successor(m_sk_packed_tuple, size1); + } + } else { + kd.get_infimum_key(m_sk_packed_tuple, &size1); + } + + uint size2 = 0; + if (max_key) { + size2 = kd.pack_index_tuple(table, m_pack_buffer, m_sk_packed_tuple_old, + m_record_buffer, + max_key->key, max_key->keypart_map); + if (max_key->flag == HA_READ_PREFIX_LAST_OR_PREV || + max_key->flag == HA_READ_PREFIX_LAST || + max_key->flag == HA_READ_AFTER_KEY) { + kd.successor(m_sk_packed_tuple_old, size2); + } + // pad the upper key with FFFFs to make sure it is more than the lower + if (size1 > size2) { + memset(m_sk_packed_tuple_old + size2, 0xff, size1 - size2); + size2 = size1; + } + } else { + kd.get_supremum_key(m_sk_packed_tuple_old, &size2); + } + + const rocksdb::Slice slice1((const char *)m_sk_packed_tuple, size1); + const rocksdb::Slice slice2((const char *)m_sk_packed_tuple_old, size2); + + // slice1 >= slice2 means no row will match + if (slice1.compare(slice2) >= 0) { + DBUG_RETURN(HA_EXIT_SUCCESS); + } + + rocksdb::Range r(kd.m_is_reverse_cf ? slice2 : slice1, + kd.m_is_reverse_cf ? slice1 : slice2); + + uint64_t sz = 0; + auto disk_size = kd.m_stats.m_actual_disk_size; + if (disk_size == 0) disk_size = kd.m_stats.m_data_size; + auto rows = kd.m_stats.m_rows; + if (rows == 0 || disk_size == 0) { + rows = 1; + disk_size = ROCKSDB_ASSUMED_KEY_VALUE_DISK_SIZE; + } + + // Getting statistics, including from Memtables + uint8_t include_flags = rocksdb::DB::INCLUDE_FILES; + rdb->GetApproximateSizes(kd.get_cf(), &r, 1, &sz, include_flags); + ret = rows * sz / disk_size; + uint64_t memTableCount; + rdb->GetApproximateMemTableStats(kd.get_cf(), r, &memTableCount, &sz); + ret += memTableCount; + + /* + GetApproximateSizes() gives estimates so ret might exceed stats.records. + MySQL then decides to use full index scan rather than range scan, which + is not efficient for most cases. + To prevent this, changing estimated records slightly smaller than + stats.records. + */ + if (ret >= stats.records) { + ret = stats.records * 0.99; + } + + if (rocksdb_debug_optimizer_n_rows > 0) { + ret = rocksdb_debug_optimizer_n_rows; + } else if (ret == 0) { + ret = 1; + } + + DBUG_RETURN(ret); +} + +void ha_rocksdb::update_create_info(HA_CREATE_INFO *const create_info) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(create_info != nullptr); + + if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) { + create_info->auto_increment_value = m_tbl_def->m_auto_incr_val; + } + + DBUG_VOID_RETURN; +} + +/** + @brief + Doing manual compaction on OPTIMIZE TABLE in RocksDB. + Compaction itself is executed by background thread in RocksDB, but + CompactRange() waits until compaction completes so this function + may take a long time. + Since RocksDB dataset is allocated per index id, OPTIMIZE TABLE + triggers manual compaction for all indexes of the table. + @details + Compaction range is from the beginning of the index id to + the first row of the next index id. When using reverse order + column family, the first row of the next index id should be + the last row of the previous index id. + + @return + HA_ADMIN_OK OK + other HA_ADMIN error code +*/ +int ha_rocksdb::optimize(THD *const thd, HA_CHECK_OPT *const check_opt) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(thd != nullptr); + DBUG_ASSERT(check_opt != nullptr); + + for (uint i = 0; i < table->s->keys; i++) { + uchar buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; + auto range = get_range(i, buf); + const rocksdb::Status s = rdb->CompactRange(getCompactRangeOptions(), + m_key_descr_arr[i]->get_cf(), + &range.start, &range.limit); + if (!s.ok()) { + DBUG_RETURN(rdb_error_to_mysql(s)); + } + } + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +static int calculate_stats( + const std::unordered_map> + &to_recalc, + bool include_memtables) { + DBUG_ENTER_FUNC(); + + // find per column family key ranges which need to be queried + std::unordered_map> + ranges; + std::unordered_map stats; + std::vector buf(to_recalc.size() * 2 * Rdb_key_def::INDEX_NUMBER_SIZE); + + uchar *bufp = buf.data(); + for (const auto &it : to_recalc) { + const GL_INDEX_ID index_id = it.first; + auto &kd = it.second; + ranges[kd->get_cf()].push_back(myrocks::get_range(*kd, bufp)); + bufp += 2 * Rdb_key_def::INDEX_NUMBER_SIZE; + + stats[index_id] = Rdb_index_stats(index_id); + DBUG_ASSERT(kd->get_key_parts() > 0); + stats[index_id].m_distinct_keys_per_prefix.resize(kd->get_key_parts()); + } + + // get RocksDB table properties for these ranges + rocksdb::TablePropertiesCollection props; + for (const auto &it : ranges) { + const auto old_size MY_ATTRIBUTE((__unused__)) = props.size(); + const auto status = rdb->GetPropertiesOfTablesInRange( + it.first, &it.second[0], it.second.size(), &props); + DBUG_ASSERT(props.size() >= old_size); + if (!status.ok()) { + DBUG_RETURN(ha_rocksdb::rdb_error_to_mysql( + status, "Could not access RocksDB properties")); + } + } + + int num_sst = 0; + for (const auto &it : props) { + std::vector sst_stats; + Rdb_tbl_prop_coll::read_stats_from_tbl_props(it.second, &sst_stats); + /* + sst_stats is a list of index statistics for indexes that have entries + in the current SST file. + */ + for (const auto &it1 : sst_stats) { + /* + Only update statistics for indexes that belong to this SQL table. + + The reason is: We are walking through all SST files that have + entries from this table (and so can compute good statistics). For + other SQL tables, it can be that we're only seeing a small fraction + of table's entries (and so we can't update statistics based on that). + */ + if (stats.find(it1.m_gl_index_id) == stats.end()) { + continue; + } + + auto it_index = to_recalc.find(it1.m_gl_index_id); + DBUG_ASSERT(it_index != to_recalc.end()); + if (it_index == to_recalc.end()) { + continue; + } + stats[it1.m_gl_index_id].merge( + it1, true, it_index->second->max_storage_fmt_length()); + } + num_sst++; + } + + if (include_memtables) { + // calculate memtable cardinality + Rdb_tbl_card_coll cardinality_collector(rocksdb_table_stats_sampling_pct); + auto read_opts = rocksdb::ReadOptions(); + read_opts.read_tier = rocksdb::ReadTier::kMemtableTier; + for (const auto &it_kd : to_recalc) { + const std::shared_ptr &kd = it_kd.second; + Rdb_index_stats &stat = stats[kd->get_gl_index_id()]; + + uchar r_buf[Rdb_key_def::INDEX_NUMBER_SIZE * 2]; + auto r = myrocks::get_range(*kd, r_buf); + uint64_t memtableCount; + uint64_t memtableSize; + rdb->GetApproximateMemTableStats(kd->get_cf(), r, &memtableCount, + &memtableSize); + if (memtableCount < (uint64_t)stat.m_rows / 10) { + // skip tables that already have enough stats from SST files to reduce + // overhead and avoid degradation of big tables stats by sampling from + // relatively tiny (less than 10% of full data set) memtable dataset + continue; + } + + std::unique_ptr it = + std::unique_ptr( + rdb->NewIterator(read_opts, kd->get_cf())); + + rocksdb::Slice first_index_key((const char *)r_buf, + Rdb_key_def::INDEX_NUMBER_SIZE); + + cardinality_collector.Reset(); + for (it->Seek(first_index_key); is_valid(it.get()); it->Next()) { + const rocksdb::Slice key = it->key(); + if (!kd->covers_key(key)) { + break; // end of this index + } + stat.m_rows++; + + cardinality_collector.ProcessKey(key, kd.get(), &stat); + } + cardinality_collector.AdjustStats(&stat); + } + } + + // set and persist new stats + ddl_manager.set_stats(stats); + ddl_manager.persist_stats(true); + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +int ha_rocksdb::calculate_stats_for_table() { + DBUG_ENTER_FUNC(); + + std::unordered_map> + ids_to_check; + for (uint i = 0; i < table->s->keys; i++) { + ids_to_check.insert(std::make_pair(m_key_descr_arr[i]->get_gl_index_id(), + m_key_descr_arr[i])); + } + + DBUG_RETURN(calculate_stats(ids_to_check, true)); +} + +/* + @return + HA_ADMIN_OK OK + other HA_ADMIN error code +*/ +int ha_rocksdb::analyze(THD *const thd, HA_CHECK_OPT *const check_opt) { + DBUG_ENTER_FUNC(); + + if (table) { + if (calculate_stats_for_table() != HA_EXIT_SUCCESS) { + DBUG_RETURN(HA_ADMIN_FAILED); + } + } + + // A call to ::info is needed to repopulate some SQL level structs. This is + // necessary for online analyze because we cannot rely on another ::open + // call to call info for us. + if (info(HA_STATUS_CONST | HA_STATUS_VARIABLE) != HA_EXIT_SUCCESS) { + DBUG_RETURN(HA_ADMIN_FAILED); + } + + DBUG_RETURN(HA_ADMIN_OK); +} + +void ha_rocksdb::get_auto_increment(ulonglong off, ulonglong inc, + ulonglong nb_desired_values, + ulonglong *const first_value, + ulonglong *const nb_reserved_values) { + /* + MySQL has a somewhat complicated way of handling the auto-increment value. + The first time get_auto_increment is called for a statement, + nb_desired_values is the estimate for how many values will be needed. The + engine can then reserve some values, and those will be automatically used + by MySQL, until a hard-coded value shows up in the insert statement, after + which MySQL again calls this function to reset its starting value. + * + For simplicity we will just ignore nb_desired_values - we aren't going to + reserve any extra values for a multi-insert statement. Each row will + simply acquire the next value as needed and we will always tell MySQL that + we only reserved 1 value. Since we are using an atomic value for + m_auto_incr_val this should be safe - if we had to grab a mutex, doing + an actual reserve of some values might be a better solution. + */ + DEBUG_SYNC(ha_thd(), "rocksdb.autoinc_vars"); + DEBUG_SYNC(ha_thd(), "rocksdb.autoinc_vars2"); + + if (off > inc) { + off = 1; + } + + Field *field; + ulonglong new_val, max_val; + field = table->key_info[table->s->next_number_index].key_part[0].field; + max_val = rdb_get_int_col_max_value(field); + + // Local variable reference to simplify code below + auto &auto_incr = m_tbl_def->m_auto_incr_val; + + if (inc == 1) { + DBUG_ASSERT(off == 1); + // Optimization for the standard case where we are always simply + // incrementing from the last position + + // Use CAS operation in a loop to make sure automically get the next auto + // increment value while ensuring that we don't wrap around to a negative + // number. + // + // We set auto_incr to the min of max_val and new_val + 1. This means that + // if we're at the maximum, we should be returning the same value for + // multiple rows, resulting in duplicate key errors (as expected). + // + // If we return values greater than the max, the SQL layer will "truncate" + // the value anyway, but it means that we store invalid values into + // auto_incr that will be visible in SHOW CREATE TABLE. + new_val = auto_incr; + while (new_val != std::numeric_limits::max()) { + if (auto_incr.compare_exchange_weak(new_val, + std::min(new_val + 1, max_val))) { + break; + } + } + } else { + // The next value can be more complicated if either 'inc' or 'off' is not 1 + ulonglong last_val = auto_incr; + + if (last_val > max_val) { + new_val = std::numeric_limits::max(); + } else { + // Loop until we can correctly update the atomic value + do { + DBUG_ASSERT(last_val > 0); + // Calculate the next value in the auto increment series: offset + // + N * increment where N is 0, 1, 2, ... + // + // For further information please visit: + // http://dev.mysql.com/doc/refman/5.7/en/replication-options-master.html + // + // The following is confusing so here is an explanation: + // To get the next number in the sequence above you subtract out the + // offset, calculate the next sequence (N * increment) and then add the + // offset back in. + // + // The additions are rearranged to avoid overflow. The following is + // equivalent to (last_val - 1 + inc - off) / inc. This uses the fact + // that (a+b)/c = a/c + b/c + (a%c + b%c)/c. To show why: + // + // (a+b)/c + // = (a - a%c + a%c + b - b%c + b%c) / c + // = (a - a%c) / c + (b - b%c) / c + (a%c + b%c) / c + // = a/c + b/c + (a%c + b%c) / c + // + // Now, substitute a = last_val - 1, b = inc - off, c = inc to get the + // following statement. + ulonglong n = + (last_val - 1) / inc + ((last_val - 1) % inc + inc - off) / inc; + + // Check if n * inc + off will overflow. This can only happen if we have + // an UNSIGNED BIGINT field. + if (n > (std::numeric_limits::max() - off) / inc) { + DBUG_ASSERT(max_val == std::numeric_limits::max()); + // The 'last_val' value is already equal to or larger than the largest + // value in the sequence. Continuing would wrap around (technically + // the behavior would be undefined). What should we do? + // We could: + // 1) set the new value to the last possible number in our sequence + // as described above. The problem with this is that this + // number could be smaller than a value in an existing row. + // 2) set the new value to the largest possible number. This number + // may not be in our sequence, but it is guaranteed to be equal + // to or larger than any other value already inserted. + // + // For now I'm going to take option 2. + // + // Returning ULLONG_MAX from get_auto_increment will cause the SQL + // layer to fail with ER_AUTOINC_READ_FAILED. This means that due to + // the SE API for get_auto_increment, inserts will fail with + // ER_AUTOINC_READ_FAILED if the column is UNSIGNED BIGINT, but + // inserts will fail with ER_DUP_ENTRY for other types (or no failure + // if the column is in a non-unique SK). + new_val = std::numeric_limits::max(); + auto_incr = new_val; // Store the largest value into auto_incr + break; + } + + new_val = n * inc + off; + + // Attempt to store the new value (plus 1 since m_auto_incr_val contains + // the next available value) into the atomic value. If the current + // value no longer matches what we have in 'last_val' this will fail and + // we will repeat the loop (`last_val` will automatically get updated + // with the current value). + // + // See above explanation for inc == 1 for why we use std::min. + } while (!auto_incr.compare_exchange_weak( + last_val, std::min(new_val + 1, max_val))); + } + } + + *first_value = new_val; + *nb_reserved_values = 1; +} + +#ifndef DBUG_OFF + +/* Debugger help function */ +static char dbug_item_print_buf[512]; + +const char *dbug_print_item(Item *const item) { + char *const buf = dbug_item_print_buf; + String str(buf, sizeof(dbug_item_print_buf), &my_charset_bin); + str.length(0); + if (!item) return "(Item*)nullptr"; + item->print(&str, QT_ORDINARY); + if (str.c_ptr() == buf) { + return buf; + } else { + return "Couldn't fit into buffer"; + } +} + +#endif /*DBUG_OFF*/ + +/** + SQL layer calls this function to push an index condition. + + @details + The condition is for index keyno (only one condition can be pushed at a + time). + The caller guarantees that condition refers only to index fields; besides + that, fields must have + + $field->part_of_key.set_bit(keyno) + + which means that + + (handler->index_flags(keyno, $keypart, 0) & HA_KEYREAD_ONLY) == 1 + + which means that field value can be restored from the index tuple. + + @return + Part of condition we couldn't check (always nullptr). +*/ + +class Item *ha_rocksdb::idx_cond_push(uint keyno, class Item *const idx_cond) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(keyno != MAX_KEY); + DBUG_ASSERT(idx_cond != nullptr); + + pushed_idx_cond = idx_cond; + pushed_idx_cond_keyno = keyno; + in_range_check_pushed_down = TRUE; + + /* We will check the whole condition */ + DBUG_RETURN(nullptr); +} + +/* + Checks if inplace alter is supported for a given operation. +*/ + +my_core::enum_alter_inplace_result ha_rocksdb::check_if_supported_inplace_alter( + TABLE *altered_table, my_core::Alter_inplace_info *const ha_alter_info) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(ha_alter_info != nullptr); + + if (ha_alter_info->handler_flags & + ~(ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX | + ALTER_DROP_UNIQUE_INDEX | + ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX | + ALTER_PARTITIONED | + ALTER_ADD_UNIQUE_INDEX | + ALTER_CHANGE_CREATE_OPTION)) { + DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED); + } + + /* We don't support unique keys on table w/ no primary keys */ + if ((ha_alter_info->handler_flags & + ALTER_ADD_UNIQUE_INDEX) && + has_hidden_pk(altered_table)) { + DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED); + } + + /* We only support changing auto_increment for table options. */ + if ((ha_alter_info->handler_flags & ALTER_CHANGE_CREATE_OPTION) && + !(ha_alter_info->create_info->used_fields & HA_CREATE_USED_AUTO)) { + DBUG_RETURN(my_core::HA_ALTER_INPLACE_NOT_SUPPORTED); + } + + /* FIXME: MDEV-16099 Use alter algorithm=nocopy or algorithm=instant + for non-InnoDB engine */ + DBUG_RETURN(my_core::HA_ALTER_INPLACE_COPY_LOCK); +} + +/** + Allows the storage engine to update internal structures with concurrent + writes blocked. If check_if_supported_inplace_alter() returns + HA_ALTER_INPLACE_COPY_NO_LOCK or HA_ALTER_INPLACE_COPY_LOCK. + this function is called with exclusive lock otherwise the same level of + locking as for inplace_alter_table() will be used. + + @note Storage engines are responsible for reporting any errors by + calling my_error()/print_error() + + @note If this function reports error, commit_inplace_alter_table() + will be called with commit= false. + + @note For partitioning, failing to prepare one partition, means that + commit_inplace_alter_table() will be called to roll back changes for + all partitions. This means that commit_inplace_alter_table() might be + called without prepare_inplace_alter_table() having been called first + for a given partition. + + @param altered_table TABLE object for new version of table. + @param ha_alter_info Structure describing changes to be done + by ALTER TABLE and holding data used + during in-place alter. + + @retval true Error + @retval false Success +*/ +bool ha_rocksdb::prepare_inplace_alter_table( + TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(altered_table != nullptr); + DBUG_ASSERT(ha_alter_info != nullptr); + + Rdb_tbl_def *new_tdef = nullptr; + std::shared_ptr *old_key_descr = nullptr; + std::shared_ptr *new_key_descr = nullptr; + uint old_n_keys = m_tbl_def->m_key_count; + uint new_n_keys = altered_table->s->keys; + std::unordered_set> added_indexes; + std::unordered_set dropped_index_ids; + uint n_dropped_keys = 0; + uint n_added_keys = 0; + ulonglong max_auto_incr = 0; + + if (ha_alter_info->handler_flags & + (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX | + ALTER_DROP_UNIQUE_INDEX | + ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX | + ALTER_ADD_UNIQUE_INDEX)) { + if (has_hidden_pk(altered_table)) { + new_n_keys += 1; + } + + const TABLE *const old_table = table; + old_key_descr = m_tbl_def->m_key_descr_arr; + new_key_descr = new std::shared_ptr[new_n_keys]; + + new_tdef = new Rdb_tbl_def(m_tbl_def->full_tablename()); + new_tdef->m_key_descr_arr = new_key_descr; + new_tdef->m_key_count = new_n_keys; + new_tdef->m_auto_incr_val = + m_tbl_def->m_auto_incr_val.load(std::memory_order_relaxed); + new_tdef->m_hidden_pk_val = + m_tbl_def->m_hidden_pk_val.load(std::memory_order_relaxed); + + if (create_key_defs(altered_table, new_tdef, table, m_tbl_def)) { + /* Delete the new key descriptors */ + delete[] new_key_descr; + + /* + Explicitly mark as nullptr so we don't accidentally remove entries + from data dictionary on cleanup (or cause double delete[]). + */ + new_tdef->m_key_descr_arr = nullptr; + delete new_tdef; + + my_error(ER_KEY_CREATE_DURING_ALTER, MYF(0)); + DBUG_RETURN(HA_EXIT_FAILURE); + } + + uint i; + uint j; + + /* Determine which(if any) key definition(s) need to be dropped */ + for (i = 0; i < ha_alter_info->index_drop_count; i++) { + const KEY *const dropped_key = ha_alter_info->index_drop_buffer[i]; + for (j = 0; j < old_n_keys; j++) { + const KEY *const old_key = + &old_table->key_info[old_key_descr[j]->get_keyno()]; + + if (!compare_keys(old_key, dropped_key)) { + dropped_index_ids.insert(old_key_descr[j]->get_gl_index_id()); + break; + } + } + } + + /* Determine which(if any) key definitions(s) need to be added */ + int identical_indexes_found = 0; + for (i = 0; i < ha_alter_info->index_add_count; i++) { + const KEY *const added_key = + &ha_alter_info->key_info_buffer[ha_alter_info->index_add_buffer[i]]; + for (j = 0; j < new_n_keys; j++) { + const KEY *const new_key = + &altered_table->key_info[new_key_descr[j]->get_keyno()]; + if (!compare_keys(new_key, added_key)) { + /* + Check for cases where an 'identical' index is being dropped and + re-added in a single ALTER statement. Turn this into a no-op as the + index has not changed. + + E.G. Unique index -> non-unique index requires no change + + Note that cases where the index name remains the same but the + key-parts are changed is already handled in create_inplace_key_defs. + In these cases the index needs to be rebuilt. + */ + if (dropped_index_ids.count(new_key_descr[j]->get_gl_index_id())) { + dropped_index_ids.erase(new_key_descr[j]->get_gl_index_id()); + identical_indexes_found++; + } else { + added_indexes.insert(new_key_descr[j]); + } + + break; + } + } + } + + n_dropped_keys = ha_alter_info->index_drop_count - identical_indexes_found; + n_added_keys = ha_alter_info->index_add_count - identical_indexes_found; + DBUG_ASSERT(dropped_index_ids.size() == n_dropped_keys); + DBUG_ASSERT(added_indexes.size() == n_added_keys); + DBUG_ASSERT(new_n_keys == (old_n_keys - n_dropped_keys + n_added_keys)); + } + if (ha_alter_info->handler_flags & ALTER_CHANGE_CREATE_OPTION) { + if (!new_tdef) { + new_tdef = m_tbl_def; + } + if (table->found_next_number_field) { + max_auto_incr = load_auto_incr_value_from_index(); + } + } + + ha_alter_info->handler_ctx = new Rdb_inplace_alter_ctx( + new_tdef, old_key_descr, new_key_descr, old_n_keys, new_n_keys, + added_indexes, dropped_index_ids, n_added_keys, n_dropped_keys, + max_auto_incr); + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/** + Alter the table structure in-place with operations specified using + HA_ALTER_FLAGS and Alter_inplace_info. The level of concurrency allowed + during this operation depends on the return value from + check_if_supported_inplace_alter(). + + @note Storage engines are responsible for reporting any errors by + calling my_error()/print_error() + + @note If this function reports error, commit_inplace_alter_table() + will be called with commit= false. + + @param altered_table TABLE object for new version of table. + @param ha_alter_info Structure describing changes to be done + by ALTER TABLE and holding data used + during in-place alter. + + @retval true Error + @retval false Success +*/ +bool ha_rocksdb::inplace_alter_table( + TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(altered_table != nullptr); + DBUG_ASSERT(ha_alter_info != nullptr); + DBUG_ASSERT(ha_alter_info->handler_ctx != nullptr); + + Rdb_inplace_alter_ctx *const ctx = + static_cast(ha_alter_info->handler_ctx); + + if (ha_alter_info->handler_flags & + (ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX | + ALTER_ADD_UNIQUE_INDEX)) { + /* + Buffers need to be set up again to account for new, possibly longer + secondary keys. + */ + free_key_buffers(); + + DBUG_ASSERT(ctx != nullptr); + + /* + If adding unique index, allocate special buffers for duplicate checking. + */ + int err; + if ((err = alloc_key_buffers( + altered_table, ctx->m_new_tdef, + ha_alter_info->handler_flags & + ALTER_ADD_UNIQUE_INDEX))) { + my_error(ER_OUT_OF_RESOURCES, MYF(0)); + DBUG_RETURN(err); + } + + /* Populate all new secondary keys by scanning the primary key. */ + if ((err = inplace_populate_sk(altered_table, ctx->m_added_indexes))) { + my_error(ER_SK_POPULATE_DURING_ALTER, MYF(0)); + DBUG_RETURN(HA_EXIT_FAILURE); + } + } + + DBUG_EXECUTE_IF("myrocks_simulate_index_create_rollback", { + dbug_create_err_inplace_alter(); + DBUG_RETURN(HA_EXIT_FAILURE); + };); + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +/** + Scan the Primary Key index entries and populate the new secondary keys. +*/ +int ha_rocksdb::inplace_populate_sk( + TABLE *const new_table_arg, + const std::unordered_set> &indexes) { + DBUG_ENTER_FUNC(); + int res = HA_EXIT_SUCCESS; + const std::unique_ptr wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); + + /* Update the data dictionary */ + std::unordered_set create_index_ids; + for (const auto &index : indexes) { + create_index_ids.insert(index->get_gl_index_id()); + } + dict_manager.add_create_index(create_index_ids, batch); + res = dict_manager.commit(batch); + if (res != HA_EXIT_SUCCESS) { + return res; + } + + /* + Add uncommitted key definitons to ddl_manager. We need to do this + so that the property collector can find this keydef when it needs to + update stats. The property collector looks for the keydef in the + data dictionary, but it won't be there yet since this key definition + is still in the creation process. + */ + ddl_manager.add_uncommitted_keydefs(indexes); + + const bool hidden_pk_exists = has_hidden_pk(table); + + Rdb_transaction *tx = get_or_create_tx(table->in_use); + + /* + There is one specific scenario where m_sst_info may not be nullptr. This + happens if the handler we're using happens to be the handler where the PK + bulk load was done on. The sequence of events that lead to this is as + follows (T1 is PK bulk load, T2 is SK alter table): + + T1: Execute last INSERT statement + T1: Return TABLE and handler object back to Table_cache_manager + T1: Close connection + T2: Execute ALTER statement + T2: Take same TABLE/handler from Table_cache_manager + T2: Call closefrm which will call finalize_bulk_load on every other open + table/handler *except* the one it's on. + T2: Acquire stale snapshot of PK + T1: Call finalize_bulk_load + + This is rare because usually, closefrm will call the destructor (and thus + finalize_bulk_load) on the handler where PK bulk load is done. However, if + the thread ids of the bulk load thread and the alter thread differ by a + multiple of table_cache_instances (8 by default), then they hash to the + same bucket in Table_cache_manager and the alter thread will not not call + the destructor on the handler it is holding. Thus, its m_sst_info will not + be nullptr. + + At this point, it is safe to refresh the snapshot because we know all other + open handlers have been closed at this point, and the one we're on is the + only one left. + */ + if (m_sst_info) { + if ((res = finalize_bulk_load())) { + DBUG_RETURN(res); + } + tx->commit(); + } + + const ulonglong rdb_merge_buf_size = THDVAR(ha_thd(), merge_buf_size); + const ulonglong rdb_merge_combine_read_size = + THDVAR(ha_thd(), merge_combine_read_size); + const ulonglong rdb_merge_tmp_file_removal_delay = + THDVAR(ha_thd(), merge_tmp_file_removal_delay_ms); + + for (const auto &index : indexes) { + bool is_unique_index = + new_table_arg->key_info[index->get_keyno()].flags & HA_NOSAME; + + Rdb_index_merge rdb_merge(tx->get_rocksdb_tmpdir(), rdb_merge_buf_size, + rdb_merge_combine_read_size, + rdb_merge_tmp_file_removal_delay, + index->get_cf()); + + if ((res = rdb_merge.init())) { + DBUG_RETURN(res); + } + + /* + Note: We pass in the currently existing table + tbl_def object here, + as the pk index position may have changed in the case of hidden primary + keys. + */ + const uint pk = pk_index(table, m_tbl_def); + ha_index_init(pk, true); + + /* Scan each record in the primary key in order */ + for (res = index_first(table->record[0]); res == 0; + res = index_next(table->record[0])) { + longlong hidden_pk_id = 0; + if (hidden_pk_exists && + (res = read_hidden_pk_id_from_rowkey(&hidden_pk_id))) { + // NO_LINT_DEBUG + sql_print_error("Error retrieving hidden pk id."); + ha_index_end(); + DBUG_RETURN(res); + } + + /* Create new secondary index entry */ + const int new_packed_size = index->pack_record( + new_table_arg, m_pack_buffer, table->record[0], m_sk_packed_tuple, + &m_sk_tails, should_store_row_debug_checksums(), hidden_pk_id, 0, + nullptr, m_ttl_bytes); + + const rocksdb::Slice key = rocksdb::Slice( + reinterpret_cast(m_sk_packed_tuple), new_packed_size); + const rocksdb::Slice val = + rocksdb::Slice(reinterpret_cast(m_sk_tails.ptr()), + m_sk_tails.get_current_pos()); + + /* + Add record to offset tree in preparation for writing out to + disk in sorted chunks. + */ + if ((res = rdb_merge.add(key, val))) { + ha_index_end(); + DBUG_RETURN(res); + } + } + + if (res != HA_ERR_END_OF_FILE) { + // NO_LINT_DEBUG + sql_print_error("Error retrieving index entry from primary key."); + ha_index_end(); + DBUG_RETURN(res); + } + + ha_index_end(); + + /* + Perform an n-way merge of n sorted buffers on disk, then writes all + results to RocksDB via SSTFileWriter API. + */ + rocksdb::Slice merge_key; + rocksdb::Slice merge_val; + + struct unique_sk_buf_info sk_info; + sk_info.dup_sk_buf = m_dup_sk_packed_tuple; + sk_info.dup_sk_buf_old = m_dup_sk_packed_tuple_old; + + while ((res = rdb_merge.next(&merge_key, &merge_val)) == 0) { + /* Perform uniqueness check if needed */ + if (is_unique_index) { + if (check_duplicate_sk(new_table_arg, *index, &merge_key, &sk_info)) { + /* + Duplicate entry found when trying to create unique secondary key. + We need to unpack the record into new_table_arg->record[0] as it + is used inside print_keydup_error so that the error message shows + the duplicate record. + */ + if (index->unpack_record( + new_table_arg, new_table_arg->record[0], &merge_key, + &merge_val, m_converter->get_verify_row_debug_checksums())) { + /* Should never reach here */ + DBUG_ASSERT(0); + } + + print_keydup_error(new_table_arg, + &new_table_arg->key_info[index->get_keyno()], + MYF(0)); + DBUG_RETURN(ER_DUP_ENTRY); + } + } + + /* + Insert key and slice to SST via SSTFileWriter API. + */ + if ((res = bulk_load_key(tx, *index, merge_key, merge_val, false))) { + break; + } + } + + /* + Here, res == -1 means that we are finished, while > 0 means an error + occurred. + */ + if (res > 0) { + // NO_LINT_DEBUG + sql_print_error("Error while bulk loading keys in external merge sort."); + DBUG_RETURN(res); + } + + bool is_critical_error; + res = tx->finish_bulk_load(&is_critical_error); + if (res && is_critical_error) { + // NO_LINT_DEBUG + sql_print_error("Error finishing bulk load."); + DBUG_RETURN(res); + } + } + + /* + Explicitly tell jemalloc to clean up any unused dirty pages at this point. + See https://reviews.facebook.net/D63723 for more details. + */ + purge_all_jemalloc_arenas(); + + DBUG_EXECUTE_IF("crash_during_online_index_creation", DBUG_SUICIDE();); + DBUG_RETURN(res); +} + +/** + Commit or rollback the changes made during prepare_inplace_alter_table() + and inplace_alter_table() inside the storage engine. + Note that in case of rollback the allowed level of concurrency during + this operation will be the same as for inplace_alter_table() and thus + might be higher than during prepare_inplace_alter_table(). (For example, + concurrent writes were blocked during prepare, but might not be during + rollback). + + @note Storage engines are responsible for reporting any errors by + calling my_error()/print_error() + + @note If this function with commit= true reports error, it will be called + again with commit= false. + + @note In case of partitioning, this function might be called for rollback + without prepare_inplace_alter_table() having been called first. + Also partitioned tables sets ha_alter_info->group_commit_ctx to a NULL + terminated array of the partitions handlers and if all of them are + committed as one, then group_commit_ctx should be set to NULL to indicate + to the partitioning handler that all partitions handlers are committed. + @see prepare_inplace_alter_table(). + + @param altered_table TABLE object for new version of table. + @param ha_alter_info Structure describing changes to be done + by ALTER TABLE and holding data used + during in-place alter. + @param commit True => Commit, False => Rollback. + + @retval true Error + @retval false Success +*/ +bool ha_rocksdb::commit_inplace_alter_table( + my_core::TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info, bool commit) { + DBUG_ENTER_FUNC(); + + DBUG_ASSERT(altered_table != nullptr); + DBUG_ASSERT(ha_alter_info != nullptr); + + Rdb_inplace_alter_ctx *const ctx0 = + static_cast(ha_alter_info->handler_ctx); + + DEBUG_SYNC(ha_thd(), "rocksdb.commit_in_place_alter_table"); + + /* + IMPORTANT: When rollback is requested, mysql will abort with + an assertion failure. That means every failed commit during inplace alter + table will result in a fatal error on the server. Indexes ongoing creation + will be detected when the server restarts, and dropped. + + For partitioned tables, a rollback call to this function (commit == false) + is done for each partition. A successful commit call only executes once + for all partitions. + */ + if (!commit) { + /* If ctx has not been created yet, nothing to do here */ + if (!ctx0) { + DBUG_RETURN(HA_EXIT_SUCCESS); + } + + /* + Cannot call destructor for Rdb_tbl_def directly because we don't want to + erase the mappings inside the ddl_manager, as the old_key_descr is still + using them. + */ + if (ctx0->m_new_key_descr) { + /* Delete the new key descriptors */ + for (uint i = 0; i < ctx0->m_new_tdef->m_key_count; i++) { + ctx0->m_new_key_descr[i] = nullptr; + } + + delete[] ctx0->m_new_key_descr; + ctx0->m_new_key_descr = nullptr; + ctx0->m_new_tdef->m_key_descr_arr = nullptr; + + delete ctx0->m_new_tdef; + } + + /* Remove uncommitted key definitons from ddl_manager */ + ddl_manager.remove_uncommitted_keydefs(ctx0->m_added_indexes); + + /* Rollback any partially created indexes */ + dict_manager.rollback_ongoing_index_creation(); + + DBUG_RETURN(HA_EXIT_SUCCESS); + } + + DBUG_ASSERT(ctx0); + + /* + For partitioned tables, we need to commit all changes to all tables at + once, unlike in the other inplace alter API methods. + */ + inplace_alter_handler_ctx **ctx_array; + inplace_alter_handler_ctx *ctx_single[2]; + + if (ha_alter_info->group_commit_ctx) { + DBUG_EXECUTE_IF("crash_during_index_creation_partition", DBUG_SUICIDE();); + ctx_array = ha_alter_info->group_commit_ctx; + } else { + ctx_single[0] = ctx0; + ctx_single[1] = nullptr; + ctx_array = ctx_single; + } + + DBUG_ASSERT(ctx0 == ctx_array[0]); + ha_alter_info->group_commit_ctx = nullptr; + + if (ha_alter_info->handler_flags & + (ALTER_DROP_NON_UNIQUE_NON_PRIM_INDEX | + ALTER_DROP_UNIQUE_INDEX | + ALTER_ADD_NON_UNIQUE_NON_PRIM_INDEX | + ALTER_ADD_UNIQUE_INDEX)) { + const std::unique_ptr wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); + std::unordered_set create_index_ids; + + m_tbl_def = ctx0->m_new_tdef; + m_key_descr_arr = m_tbl_def->m_key_descr_arr; + m_pk_descr = m_key_descr_arr[pk_index(altered_table, m_tbl_def)]; + + dict_manager.lock(); + for (inplace_alter_handler_ctx **pctx = ctx_array; *pctx; pctx++) { + Rdb_inplace_alter_ctx *const ctx = + static_cast(*pctx); + + /* Mark indexes to be dropped */ + dict_manager.add_drop_index(ctx->m_dropped_index_ids, batch); + + for (const auto &index : ctx->m_added_indexes) { + create_index_ids.insert(index->get_gl_index_id()); + } + + if (ddl_manager.put_and_write(ctx->m_new_tdef, batch)) { + /* + Failed to write new entry into data dictionary, this should never + happen. + */ + DBUG_ASSERT(0); + } + + /* + Remove uncommitted key definitons from ddl_manager, as they are now + committed into the data dictionary. + */ + ddl_manager.remove_uncommitted_keydefs(ctx->m_added_indexes); + } + + if (dict_manager.commit(batch)) { + /* + Should never reach here. We assume MyRocks will abort if commit fails. + */ + DBUG_ASSERT(0); + } + + dict_manager.unlock(); + + /* Mark ongoing create indexes as finished/remove from data dictionary */ + dict_manager.finish_indexes_operation( + create_index_ids, Rdb_key_def::DDL_CREATE_INDEX_ONGOING); + + rdb_drop_idx_thread.signal(); + } + + if (ha_alter_info->handler_flags & ALTER_CHANGE_CREATE_OPTION) { + const std::unique_ptr wb = dict_manager.begin(); + rocksdb::WriteBatch *const batch = wb.get(); + std::unordered_set create_index_ids; + + ulonglong auto_incr_val = ha_alter_info->create_info->auto_increment_value; + + for (inplace_alter_handler_ctx **pctx = ctx_array; *pctx; pctx++) { + Rdb_inplace_alter_ctx *const ctx = + static_cast(*pctx); + auto_incr_val = std::max(auto_incr_val, ctx->m_max_auto_incr); + dict_manager.put_auto_incr_val( + batch, ctx->m_new_tdef->get_autoincr_gl_index_id(), auto_incr_val, + true /* overwrite */); + ctx->m_new_tdef->m_auto_incr_val = auto_incr_val; + } + + if (dict_manager.commit(batch)) { + DBUG_ASSERT(0); + } + } + + DBUG_RETURN(HA_EXIT_SUCCESS); +} + +#define SHOW_FNAME(name) rocksdb_show_##name + +#define DEF_SHOW_FUNC(name, key) \ + static int SHOW_FNAME(name)(MYSQL_THD thd, SHOW_VAR * var, char *buff) { \ + rocksdb_status_counters.name = \ + rocksdb_stats->getTickerCount(rocksdb::key); \ + var->type = SHOW_LONGLONG; \ + var->value = reinterpret_cast(&rocksdb_status_counters.name); \ + return HA_EXIT_SUCCESS; \ + } + +#define DEF_STATUS_VAR(name) \ + { "rocksdb_" #name, (char *)&SHOW_FNAME(name), SHOW_FUNC } + +#define DEF_STATUS_VAR_PTR(name, ptr, option) \ + { "rocksdb_" name, (char *)ptr, option } + +#define DEF_STATUS_VAR_FUNC(name, ptr, option) \ + { name, reinterpret_cast(ptr), option } + +struct rocksdb_status_counters_t { + uint64_t block_cache_miss; + uint64_t block_cache_hit; + uint64_t block_cache_add; + uint64_t block_cache_add_failures; + uint64_t block_cache_index_miss; + uint64_t block_cache_index_hit; + uint64_t block_cache_index_add; + uint64_t block_cache_index_bytes_insert; + uint64_t block_cache_index_bytes_evict; + uint64_t block_cache_filter_miss; + uint64_t block_cache_filter_hit; + uint64_t block_cache_filter_add; + uint64_t block_cache_filter_bytes_insert; + uint64_t block_cache_filter_bytes_evict; + uint64_t block_cache_bytes_read; + uint64_t block_cache_bytes_write; + uint64_t block_cache_data_bytes_insert; + uint64_t block_cache_data_miss; + uint64_t block_cache_data_hit; + uint64_t block_cache_data_add; + uint64_t bloom_filter_useful; + uint64_t bloom_filter_full_positive; + uint64_t bloom_filter_full_true_positive; + uint64_t memtable_hit; + uint64_t memtable_miss; + uint64_t get_hit_l0; + uint64_t get_hit_l1; + uint64_t get_hit_l2_and_up; + uint64_t compaction_key_drop_new; + uint64_t compaction_key_drop_obsolete; + uint64_t compaction_key_drop_user; + uint64_t number_keys_written; + uint64_t number_keys_read; + uint64_t number_keys_updated; + uint64_t bytes_written; + uint64_t bytes_read; + uint64_t number_db_seek; + uint64_t number_db_seek_found; + uint64_t number_db_next; + uint64_t number_db_next_found; + uint64_t number_db_prev; + uint64_t number_db_prev_found; + uint64_t iter_bytes_read; + uint64_t no_file_closes; + uint64_t no_file_opens; + uint64_t no_file_errors; + uint64_t stall_micros; + uint64_t num_iterators; + uint64_t number_multiget_get; + uint64_t number_multiget_keys_read; + uint64_t number_multiget_bytes_read; + uint64_t number_deletes_filtered; + uint64_t number_merge_failures; + uint64_t bloom_filter_prefix_checked; + uint64_t bloom_filter_prefix_useful; + uint64_t number_reseeks_iteration; + uint64_t getupdatessince_calls; + uint64_t block_cachecompressed_miss; + uint64_t block_cachecompressed_hit; + uint64_t wal_synced; + uint64_t wal_bytes; + uint64_t write_self; + uint64_t write_other; + uint64_t write_timedout; + uint64_t write_wal; + uint64_t flush_write_bytes; + uint64_t compact_read_bytes; + uint64_t compact_write_bytes; + uint64_t number_superversion_acquires; + uint64_t number_superversion_releases; + uint64_t number_superversion_cleanups; + uint64_t number_block_not_compressed; +}; + +static rocksdb_status_counters_t rocksdb_status_counters; + +DEF_SHOW_FUNC(block_cache_miss, BLOCK_CACHE_MISS) +DEF_SHOW_FUNC(block_cache_hit, BLOCK_CACHE_HIT) +DEF_SHOW_FUNC(block_cache_add, BLOCK_CACHE_ADD) +DEF_SHOW_FUNC(block_cache_add_failures, BLOCK_CACHE_ADD_FAILURES) +DEF_SHOW_FUNC(block_cache_index_miss, BLOCK_CACHE_INDEX_MISS) +DEF_SHOW_FUNC(block_cache_index_hit, BLOCK_CACHE_INDEX_HIT) +DEF_SHOW_FUNC(block_cache_index_add, BLOCK_CACHE_INDEX_ADD) +DEF_SHOW_FUNC(block_cache_index_bytes_insert, BLOCK_CACHE_INDEX_BYTES_INSERT) +DEF_SHOW_FUNC(block_cache_index_bytes_evict, BLOCK_CACHE_INDEX_BYTES_EVICT) +DEF_SHOW_FUNC(block_cache_filter_miss, BLOCK_CACHE_FILTER_MISS) +DEF_SHOW_FUNC(block_cache_filter_hit, BLOCK_CACHE_FILTER_HIT) +DEF_SHOW_FUNC(block_cache_filter_add, BLOCK_CACHE_FILTER_ADD) +DEF_SHOW_FUNC(block_cache_filter_bytes_insert, BLOCK_CACHE_FILTER_BYTES_INSERT) +DEF_SHOW_FUNC(block_cache_filter_bytes_evict, BLOCK_CACHE_FILTER_BYTES_EVICT) +DEF_SHOW_FUNC(block_cache_bytes_read, BLOCK_CACHE_BYTES_READ) +DEF_SHOW_FUNC(block_cache_bytes_write, BLOCK_CACHE_BYTES_WRITE) +DEF_SHOW_FUNC(block_cache_data_bytes_insert, BLOCK_CACHE_DATA_BYTES_INSERT) +DEF_SHOW_FUNC(block_cache_data_miss, BLOCK_CACHE_DATA_MISS) +DEF_SHOW_FUNC(block_cache_data_hit, BLOCK_CACHE_DATA_HIT) +DEF_SHOW_FUNC(block_cache_data_add, BLOCK_CACHE_DATA_ADD) +DEF_SHOW_FUNC(bloom_filter_useful, BLOOM_FILTER_USEFUL) +DEF_SHOW_FUNC(bloom_filter_full_positive, BLOOM_FILTER_FULL_POSITIVE) +DEF_SHOW_FUNC(bloom_filter_full_true_positive, BLOOM_FILTER_FULL_TRUE_POSITIVE) +DEF_SHOW_FUNC(memtable_hit, MEMTABLE_HIT) +DEF_SHOW_FUNC(memtable_miss, MEMTABLE_MISS) +DEF_SHOW_FUNC(get_hit_l0, GET_HIT_L0) +DEF_SHOW_FUNC(get_hit_l1, GET_HIT_L1) +DEF_SHOW_FUNC(get_hit_l2_and_up, GET_HIT_L2_AND_UP) +DEF_SHOW_FUNC(compaction_key_drop_new, COMPACTION_KEY_DROP_NEWER_ENTRY) +DEF_SHOW_FUNC(compaction_key_drop_obsolete, COMPACTION_KEY_DROP_OBSOLETE) +DEF_SHOW_FUNC(compaction_key_drop_user, COMPACTION_KEY_DROP_USER) +DEF_SHOW_FUNC(number_keys_written, NUMBER_KEYS_WRITTEN) +DEF_SHOW_FUNC(number_keys_read, NUMBER_KEYS_READ) +DEF_SHOW_FUNC(number_keys_updated, NUMBER_KEYS_UPDATED) +DEF_SHOW_FUNC(bytes_written, BYTES_WRITTEN) +DEF_SHOW_FUNC(bytes_read, BYTES_READ) +DEF_SHOW_FUNC(number_db_seek, NUMBER_DB_SEEK) +DEF_SHOW_FUNC(number_db_seek_found, NUMBER_DB_SEEK_FOUND) +DEF_SHOW_FUNC(number_db_next, NUMBER_DB_NEXT) +DEF_SHOW_FUNC(number_db_next_found, NUMBER_DB_NEXT_FOUND) +DEF_SHOW_FUNC(number_db_prev, NUMBER_DB_PREV) +DEF_SHOW_FUNC(number_db_prev_found, NUMBER_DB_PREV_FOUND) +DEF_SHOW_FUNC(iter_bytes_read, ITER_BYTES_READ) +DEF_SHOW_FUNC(no_file_closes, NO_FILE_CLOSES) +DEF_SHOW_FUNC(no_file_opens, NO_FILE_OPENS) +DEF_SHOW_FUNC(no_file_errors, NO_FILE_ERRORS) +DEF_SHOW_FUNC(stall_micros, STALL_MICROS) +DEF_SHOW_FUNC(num_iterators, NO_ITERATORS) +DEF_SHOW_FUNC(number_multiget_get, NUMBER_MULTIGET_CALLS) +DEF_SHOW_FUNC(number_multiget_keys_read, NUMBER_MULTIGET_KEYS_READ) +DEF_SHOW_FUNC(number_multiget_bytes_read, NUMBER_MULTIGET_BYTES_READ) +DEF_SHOW_FUNC(number_deletes_filtered, NUMBER_FILTERED_DELETES) +DEF_SHOW_FUNC(number_merge_failures, NUMBER_MERGE_FAILURES) +DEF_SHOW_FUNC(bloom_filter_prefix_checked, BLOOM_FILTER_PREFIX_CHECKED) +DEF_SHOW_FUNC(bloom_filter_prefix_useful, BLOOM_FILTER_PREFIX_USEFUL) +DEF_SHOW_FUNC(number_reseeks_iteration, NUMBER_OF_RESEEKS_IN_ITERATION) +DEF_SHOW_FUNC(getupdatessince_calls, GET_UPDATES_SINCE_CALLS) +DEF_SHOW_FUNC(block_cachecompressed_miss, BLOCK_CACHE_COMPRESSED_MISS) +DEF_SHOW_FUNC(block_cachecompressed_hit, BLOCK_CACHE_COMPRESSED_HIT) +DEF_SHOW_FUNC(wal_synced, WAL_FILE_SYNCED) +DEF_SHOW_FUNC(wal_bytes, WAL_FILE_BYTES) +DEF_SHOW_FUNC(write_self, WRITE_DONE_BY_SELF) +DEF_SHOW_FUNC(write_other, WRITE_DONE_BY_OTHER) +DEF_SHOW_FUNC(write_timedout, WRITE_TIMEDOUT) +DEF_SHOW_FUNC(write_wal, WRITE_WITH_WAL) +DEF_SHOW_FUNC(flush_write_bytes, FLUSH_WRITE_BYTES) +DEF_SHOW_FUNC(compact_read_bytes, COMPACT_READ_BYTES) +DEF_SHOW_FUNC(compact_write_bytes, COMPACT_WRITE_BYTES) +DEF_SHOW_FUNC(number_superversion_acquires, NUMBER_SUPERVERSION_ACQUIRES) +DEF_SHOW_FUNC(number_superversion_releases, NUMBER_SUPERVERSION_RELEASES) +DEF_SHOW_FUNC(number_superversion_cleanups, NUMBER_SUPERVERSION_CLEANUPS) +DEF_SHOW_FUNC(number_block_not_compressed, NUMBER_BLOCK_NOT_COMPRESSED) + +static void myrocks_update_status() { + export_stats.rows_deleted = global_stats.rows[ROWS_DELETED]; + export_stats.rows_inserted = global_stats.rows[ROWS_INSERTED]; + export_stats.rows_read = global_stats.rows[ROWS_READ]; + export_stats.rows_updated = global_stats.rows[ROWS_UPDATED]; + export_stats.rows_deleted_blind = global_stats.rows[ROWS_DELETED_BLIND]; + export_stats.rows_expired = global_stats.rows[ROWS_EXPIRED]; + export_stats.rows_filtered = global_stats.rows[ROWS_FILTERED]; + + export_stats.system_rows_deleted = global_stats.system_rows[ROWS_DELETED]; + export_stats.system_rows_inserted = global_stats.system_rows[ROWS_INSERTED]; + export_stats.system_rows_read = global_stats.system_rows[ROWS_READ]; + export_stats.system_rows_updated = global_stats.system_rows[ROWS_UPDATED]; + + export_stats.queries_point = global_stats.queries[QUERIES_POINT]; + export_stats.queries_range = global_stats.queries[QUERIES_RANGE]; + + export_stats.covered_secondary_key_lookups = + global_stats.covered_secondary_key_lookups; +} + +static void myrocks_update_memory_status() { + std::vector dbs; + std::unordered_set cache_set; + dbs.push_back(rdb); + std::map temp_usage_by_type; + rocksdb::MemoryUtil::GetApproximateMemoryUsageByType(dbs, cache_set, + &temp_usage_by_type); + memory_stats.memtable_total = + temp_usage_by_type[rocksdb::MemoryUtil::kMemTableTotal]; + memory_stats.memtable_unflushed = + temp_usage_by_type[rocksdb::MemoryUtil::kMemTableUnFlushed]; +} + +static SHOW_VAR myrocks_status_variables[] = { + DEF_STATUS_VAR_FUNC("rows_deleted", &export_stats.rows_deleted, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("rows_inserted", &export_stats.rows_inserted, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("rows_read", &export_stats.rows_read, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("rows_updated", &export_stats.rows_updated, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("rows_deleted_blind", &export_stats.rows_deleted_blind, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("rows_expired", &export_stats.rows_expired, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("rows_filtered", &export_stats.rows_filtered, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("system_rows_deleted", + &export_stats.system_rows_deleted, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("system_rows_inserted", + &export_stats.system_rows_inserted, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("system_rows_read", &export_stats.system_rows_read, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("system_rows_updated", + &export_stats.system_rows_updated, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("memtable_total", &memory_stats.memtable_total, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("memtable_unflushed", &memory_stats.memtable_unflushed, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("queries_point", &export_stats.queries_point, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("queries_range", &export_stats.queries_range, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("covered_secondary_key_lookups", + &export_stats.covered_secondary_key_lookups, + SHOW_LONGLONG), + + {NullS, NullS, SHOW_LONG}}; + +static void show_myrocks_vars(THD *thd, SHOW_VAR *var, char *buff) { + myrocks_update_status(); + myrocks_update_memory_status(); + var->type = SHOW_ARRAY; + var->value = reinterpret_cast(&myrocks_status_variables); +} + +static ulonglong io_stall_prop_value( + const std::map &props, const std::string &key) { + std::map::const_iterator iter = + props.find("io_stalls." + key); + if (iter != props.end()) { + return std::stoull(iter->second); + } else { + DBUG_PRINT("warning", + ("RocksDB GetMapPropery hasn't returned key=%s", key.c_str())); + DBUG_ASSERT(0); + return 0; + } +} + +static void update_rocksdb_stall_status() { + st_io_stall_stats local_io_stall_stats; + for (const auto &cf_name : cf_manager.get_cf_names()) { + rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name); + if (cfh == nullptr) { + continue; + } + + std::map props; + if (!rdb->GetMapProperty(cfh, "rocksdb.cfstats", &props)) { + continue; + } + + local_io_stall_stats.level0_slowdown += + io_stall_prop_value(props, "level0_slowdown"); + local_io_stall_stats.level0_slowdown_with_compaction += + io_stall_prop_value(props, "level0_slowdown_with_compaction"); + local_io_stall_stats.level0_numfiles += + io_stall_prop_value(props, "level0_numfiles"); + local_io_stall_stats.level0_numfiles_with_compaction += + io_stall_prop_value(props, "level0_numfiles_with_compaction"); + local_io_stall_stats.stop_for_pending_compaction_bytes += + io_stall_prop_value(props, "stop_for_pending_compaction_bytes"); + local_io_stall_stats.slowdown_for_pending_compaction_bytes += + io_stall_prop_value(props, "slowdown_for_pending_compaction_bytes"); + local_io_stall_stats.memtable_compaction += + io_stall_prop_value(props, "memtable_compaction"); + local_io_stall_stats.memtable_slowdown += + io_stall_prop_value(props, "memtable_slowdown"); + local_io_stall_stats.total_stop += io_stall_prop_value(props, "total_stop"); + local_io_stall_stats.total_slowdown += + io_stall_prop_value(props, "total_slowdown"); + } + io_stall_stats = local_io_stall_stats; +} + +static SHOW_VAR rocksdb_stall_status_variables[] = { + DEF_STATUS_VAR_FUNC("l0_file_count_limit_slowdowns", + &io_stall_stats.level0_slowdown, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("locked_l0_file_count_limit_slowdowns", + &io_stall_stats.level0_slowdown_with_compaction, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("l0_file_count_limit_stops", + &io_stall_stats.level0_numfiles, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("locked_l0_file_count_limit_stops", + &io_stall_stats.level0_numfiles_with_compaction, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("pending_compaction_limit_stops", + &io_stall_stats.stop_for_pending_compaction_bytes, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("pending_compaction_limit_slowdowns", + &io_stall_stats.slowdown_for_pending_compaction_bytes, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("memtable_limit_stops", + &io_stall_stats.memtable_compaction, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("memtable_limit_slowdowns", + &io_stall_stats.memtable_slowdown, SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("total_stops", &io_stall_stats.total_stop, + SHOW_LONGLONG), + DEF_STATUS_VAR_FUNC("total_slowdowns", &io_stall_stats.total_slowdown, + SHOW_LONGLONG), + // end of the array marker + {NullS, NullS, SHOW_LONG}}; + +static void show_rocksdb_stall_vars(THD *thd, SHOW_VAR *var, char *buff) { + update_rocksdb_stall_status(); + var->type = SHOW_ARRAY; + var->value = reinterpret_cast(&rocksdb_stall_status_variables); +} + +static SHOW_VAR rocksdb_status_vars[] = { + DEF_STATUS_VAR(block_cache_miss), + DEF_STATUS_VAR(block_cache_hit), + DEF_STATUS_VAR(block_cache_add), + DEF_STATUS_VAR(block_cache_add_failures), + DEF_STATUS_VAR(block_cache_index_miss), + DEF_STATUS_VAR(block_cache_index_hit), + DEF_STATUS_VAR(block_cache_index_add), + DEF_STATUS_VAR(block_cache_index_bytes_insert), + DEF_STATUS_VAR(block_cache_index_bytes_evict), + DEF_STATUS_VAR(block_cache_filter_miss), + DEF_STATUS_VAR(block_cache_filter_hit), + DEF_STATUS_VAR(block_cache_filter_add), + DEF_STATUS_VAR(block_cache_filter_bytes_insert), + DEF_STATUS_VAR(block_cache_filter_bytes_evict), + DEF_STATUS_VAR(block_cache_bytes_read), + DEF_STATUS_VAR(block_cache_bytes_write), + DEF_STATUS_VAR(block_cache_data_bytes_insert), + DEF_STATUS_VAR(block_cache_data_miss), + DEF_STATUS_VAR(block_cache_data_hit), + DEF_STATUS_VAR(block_cache_data_add), + DEF_STATUS_VAR(bloom_filter_useful), + DEF_STATUS_VAR(bloom_filter_full_positive), + DEF_STATUS_VAR(bloom_filter_full_true_positive), + DEF_STATUS_VAR(memtable_hit), + DEF_STATUS_VAR(memtable_miss), + DEF_STATUS_VAR(get_hit_l0), + DEF_STATUS_VAR(get_hit_l1), + DEF_STATUS_VAR(get_hit_l2_and_up), + DEF_STATUS_VAR(compaction_key_drop_new), + DEF_STATUS_VAR(compaction_key_drop_obsolete), + DEF_STATUS_VAR(compaction_key_drop_user), + DEF_STATUS_VAR(number_keys_written), + DEF_STATUS_VAR(number_keys_read), + DEF_STATUS_VAR(number_keys_updated), + DEF_STATUS_VAR(bytes_written), + DEF_STATUS_VAR(bytes_read), + DEF_STATUS_VAR(number_db_seek), + DEF_STATUS_VAR(number_db_seek_found), + DEF_STATUS_VAR(number_db_next), + DEF_STATUS_VAR(number_db_next_found), + DEF_STATUS_VAR(number_db_prev), + DEF_STATUS_VAR(number_db_prev_found), + DEF_STATUS_VAR(iter_bytes_read), + DEF_STATUS_VAR(no_file_closes), + DEF_STATUS_VAR(no_file_opens), + DEF_STATUS_VAR(no_file_errors), + DEF_STATUS_VAR(stall_micros), + DEF_STATUS_VAR(num_iterators), + DEF_STATUS_VAR(number_multiget_get), + DEF_STATUS_VAR(number_multiget_keys_read), + DEF_STATUS_VAR(number_multiget_bytes_read), + DEF_STATUS_VAR(number_deletes_filtered), + DEF_STATUS_VAR(number_merge_failures), + DEF_STATUS_VAR(bloom_filter_prefix_checked), + DEF_STATUS_VAR(bloom_filter_prefix_useful), + DEF_STATUS_VAR(number_reseeks_iteration), + DEF_STATUS_VAR(getupdatessince_calls), + DEF_STATUS_VAR(block_cachecompressed_miss), + DEF_STATUS_VAR(block_cachecompressed_hit), + DEF_STATUS_VAR(wal_synced), + DEF_STATUS_VAR(wal_bytes), + DEF_STATUS_VAR(write_self), + DEF_STATUS_VAR(write_other), + DEF_STATUS_VAR(write_timedout), + DEF_STATUS_VAR(write_wal), + DEF_STATUS_VAR(flush_write_bytes), + DEF_STATUS_VAR(compact_read_bytes), + DEF_STATUS_VAR(compact_write_bytes), + DEF_STATUS_VAR(number_superversion_acquires), + DEF_STATUS_VAR(number_superversion_releases), + DEF_STATUS_VAR(number_superversion_cleanups), + DEF_STATUS_VAR(number_block_not_compressed), + DEF_STATUS_VAR_PTR("row_lock_deadlocks", &rocksdb_row_lock_deadlocks, + SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("row_lock_wait_timeouts", + &rocksdb_row_lock_wait_timeouts, SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("snapshot_conflict_errors", + &rocksdb_snapshot_conflict_errors, SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("wal_group_syncs", &rocksdb_wal_group_syncs, + SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("manual_compactions_processed", + &rocksdb_manual_compactions_processed, SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("manual_compactions_running", + &rocksdb_manual_compactions_running, SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("number_sst_entry_put", &rocksdb_num_sst_entry_put, + SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("number_sst_entry_delete", &rocksdb_num_sst_entry_delete, + SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("number_sst_entry_singledelete", + &rocksdb_num_sst_entry_singledelete, SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("number_sst_entry_merge", &rocksdb_num_sst_entry_merge, + SHOW_LONGLONG), + DEF_STATUS_VAR_PTR("number_sst_entry_other", &rocksdb_num_sst_entry_other, + SHOW_LONGLONG), +#ifndef DBUG_OFF + DEF_STATUS_VAR_PTR("num_get_for_update_calls", + &rocksdb_num_get_for_update_calls, SHOW_LONGLONG), +#endif + // the variables generated by SHOW_FUNC are sorted only by prefix (first + // arg in the tuple below), so make sure it is unique to make sorting + // deterministic as quick sort is not stable + {"rocksdb", reinterpret_cast(&show_myrocks_vars), SHOW_FUNC}, + {"rocksdb_stall", reinterpret_cast(&show_rocksdb_stall_vars), + SHOW_FUNC}, + {NullS, NullS, SHOW_LONG}}; + +/* + Background thread's main logic +*/ + +void Rdb_background_thread::run() { + // How many seconds to wait till flushing the WAL next time. + const int WAKE_UP_INTERVAL = 1; + + timespec ts_next_sync; + set_timespec(ts_next_sync, WAKE_UP_INTERVAL); + + for (;;) { + // Wait until the next timeout or until we receive a signal to stop the + // thread. Request to stop the thread should only be triggered when the + // storage engine is being unloaded. + RDB_MUTEX_LOCK_CHECK(m_signal_mutex); + const auto ret MY_ATTRIBUTE((__unused__)) = + mysql_cond_timedwait(&m_signal_cond, &m_signal_mutex, &ts_next_sync); + + // Check that we receive only the expected error codes. + DBUG_ASSERT(ret == 0 || ret == ETIMEDOUT); + const bool local_stop = m_stop; + const bool local_save_stats = m_save_stats; + reset(); + RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex); + + if (local_stop) { + // If we're here then that's because condition variable was signaled by + // another thread and we're shutting down. Break out the loop to make + // sure that shutdown thread can proceed. + break; + } + + // This path should be taken only when the timer expired. + DBUG_ASSERT(ret == ETIMEDOUT); + + if (local_save_stats) { + ddl_manager.persist_stats(); + } + + // Set the next timestamp for mysql_cond_timedwait() (which ends up calling + // pthread_cond_timedwait()) to wait on. + set_timespec(ts_next_sync, WAKE_UP_INTERVAL); + + // Flush the WAL. Sync it for both background and never modes to copy + // InnoDB's behavior. For mode never, the wal file isn't even written, + // whereas background writes to the wal file, but issues the syncs in a + // background thread. + if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC) && + !rocksdb_db_options->allow_mmap_writes) { + const rocksdb::Status s = rdb->FlushWAL(true); + if (!s.ok()) { + rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD); + } + } + // Recalculate statistics for indexes. + if (rocksdb_stats_recalc_rate) { + std::unordered_map> + to_recalc; + + if (rdb_indexes_to_recalc.empty()) { + struct Rdb_index_collector : public Rdb_tables_scanner { + int add_table(Rdb_tbl_def *tdef) override { + for (uint i = 0; i < tdef->m_key_count; i++) { + rdb_indexes_to_recalc.push_back( + tdef->m_key_descr_arr[i]->get_gl_index_id()); + } + return HA_EXIT_SUCCESS; + } + } collector; + ddl_manager.scan_for_tables(&collector); + } + + while (to_recalc.size() < rocksdb_stats_recalc_rate && + !rdb_indexes_to_recalc.empty()) { + const auto index_id = rdb_indexes_to_recalc.back(); + rdb_indexes_to_recalc.pop_back(); + + std::shared_ptr keydef = + ddl_manager.safe_find(index_id); + + if (keydef) { + to_recalc.insert(std::make_pair(keydef->get_gl_index_id(), keydef)); + } + } + + if (!to_recalc.empty()) { + calculate_stats(to_recalc, false); + } + } + + } + + // save remaining stats which might've left unsaved + ddl_manager.persist_stats(); +} + +/* + A background thread to handle manual compactions, + except for dropping indexes/tables. Every second, it checks + pending manual compactions, and it calls CompactRange if there is. +*/ +void Rdb_manual_compaction_thread::run() { + mysql_mutex_init(0, &m_mc_mutex, MY_MUTEX_INIT_FAST); + RDB_MUTEX_LOCK_CHECK(m_signal_mutex); + for (;;) { + if (m_stop) { + break; + } + timespec ts; + set_timespec(ts, 1); + + const auto ret MY_ATTRIBUTE((__unused__)) = + mysql_cond_timedwait(&m_signal_cond, &m_signal_mutex, &ts); + if (m_stop) { + break; + } + // make sure, no program error is returned + DBUG_ASSERT(ret == 0 || ret == ETIMEDOUT); + RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex); + + RDB_MUTEX_LOCK_CHECK(m_mc_mutex); + // Grab the first item and proceed, if not empty. + if (m_requests.empty()) { + RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex); + RDB_MUTEX_LOCK_CHECK(m_signal_mutex); + continue; + } + Manual_compaction_request &mcr = m_requests.begin()->second; + DBUG_ASSERT(mcr.cf != nullptr); + DBUG_ASSERT(mcr.state == Manual_compaction_request::INITED); + mcr.state = Manual_compaction_request::RUNNING; + RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex); + + DBUG_ASSERT(mcr.state == Manual_compaction_request::RUNNING); + // NO_LINT_DEBUG + sql_print_information("Manual Compaction id %d cf %s started.", mcr.mc_id, + mcr.cf->GetName().c_str()); + rocksdb_manual_compactions_running++; + if (rocksdb_debug_manual_compaction_delay > 0) { + my_sleep(rocksdb_debug_manual_compaction_delay * 1000000); + } + // CompactRange may take a very long time. On clean shutdown, + // it is cancelled by CancelAllBackgroundWork, then status is + // set to shutdownInProgress. + const rocksdb::Status s = rdb->CompactRange( + getCompactRangeOptions(mcr.concurrency), mcr.cf, mcr.start, mcr.limit); + rocksdb_manual_compactions_running--; + if (s.ok()) { + // NO_LINT_DEBUG + sql_print_information("Manual Compaction id %d cf %s ended.", mcr.mc_id, + mcr.cf->GetName().c_str()); + } else { + // NO_LINT_DEBUG + sql_print_information("Manual Compaction id %d cf %s aborted. %s", + mcr.mc_id, mcr.cf->GetName().c_str(), s.getState()); + if (!s.IsShutdownInProgress()) { + rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD); + } else { + DBUG_ASSERT(m_requests.size() == 1); + } + } + rocksdb_manual_compactions_processed++; + clear_manual_compaction_request(mcr.mc_id, false); + RDB_MUTEX_LOCK_CHECK(m_signal_mutex); + } + clear_all_manual_compaction_requests(); + DBUG_ASSERT(m_requests.empty()); + RDB_MUTEX_UNLOCK_CHECK(m_signal_mutex); + mysql_mutex_destroy(&m_mc_mutex); +} + +void Rdb_manual_compaction_thread::clear_all_manual_compaction_requests() { + RDB_MUTEX_LOCK_CHECK(m_mc_mutex); + m_requests.clear(); + RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex); +} + +void Rdb_manual_compaction_thread::clear_manual_compaction_request( + int mc_id, bool init_only) { + bool erase = true; + RDB_MUTEX_LOCK_CHECK(m_mc_mutex); + auto it = m_requests.find(mc_id); + if (it != m_requests.end()) { + if (init_only) { + Manual_compaction_request mcr = it->second; + if (mcr.state != Manual_compaction_request::INITED) { + erase = false; + } + } + if (erase) { + m_requests.erase(it); + } + } else { + // Current code path guarantees that erasing by the same mc_id happens + // at most once. INITED state may be erased by a thread that requested + // the compaction. RUNNING state is erased by mc thread only. + DBUG_ASSERT(0); + } + RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex); +} + +int Rdb_manual_compaction_thread::request_manual_compaction( + rocksdb::ColumnFamilyHandle *cf, rocksdb::Slice *start, + rocksdb::Slice *limit, int concurrency) { + int mc_id = -1; + RDB_MUTEX_LOCK_CHECK(m_mc_mutex); + if (m_requests.size() >= rocksdb_max_manual_compactions) { + RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex); + return mc_id; + } + Manual_compaction_request mcr; + mc_id = mcr.mc_id = ++m_latest_mc_id; + mcr.state = Manual_compaction_request::INITED; + mcr.cf = cf; + mcr.start = start; + mcr.limit = limit; + mcr.concurrency = concurrency; + m_requests.insert(std::make_pair(mcr.mc_id, mcr)); + RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex); + return mc_id; +} + +bool Rdb_manual_compaction_thread::is_manual_compaction_finished(int mc_id) { + bool finished = false; + RDB_MUTEX_LOCK_CHECK(m_mc_mutex); + if (m_requests.count(mc_id) == 0) { + finished = true; + } + RDB_MUTEX_UNLOCK_CHECK(m_mc_mutex); + return finished; +} + +/** + * Locking read + Not Found + Read Committed occurs if we accessed + * a row by Seek, tried to lock it, failed, released and reacquired the + * snapshot (because of READ COMMITTED mode) and the row was deleted by + * someone else in the meantime. + * If so, we either just skipping the row, or re-creating a snapshot + * and seek again. In both cases, Read Committed constraint is not broken. + */ +bool ha_rocksdb::should_skip_invalidated_record(const int rc) { + if ((m_lock_rows != RDB_LOCK_NONE && rc == HA_ERR_KEY_NOT_FOUND && + my_core::thd_tx_isolation(ha_thd()) == ISO_READ_COMMITTED)) { + return true; + } + return false; +} +/** + * Indicating snapshot needs to be re-created and retrying seek again, + * instead of returning errors or empty set. This is normally applicable + * when hitting kBusy when locking the first row of the transaction, + * with Repeatable Read isolation level. + */ +bool ha_rocksdb::should_recreate_snapshot(const int rc, + const bool is_new_snapshot) { + if (should_skip_invalidated_record(rc) || + (rc == HA_ERR_ROCKSDB_STATUS_BUSY && is_new_snapshot)) { + return true; + } + return false; +} + +/** + * If calling put/delete/singledelete without locking the row, + * it is necessary to pass assume_tracked=false to RocksDB TX API. + * Read Free Replication and Blind Deletes are the cases when + * using TX API and skipping row locking. + */ +bool ha_rocksdb::can_assume_tracked(THD *thd) { + if (/* MARIAROCKS_NOT_YET use_read_free_rpl() ||*/ (THDVAR(thd, blind_delete_primary_key))) { + return false; + } + return true; +} + +bool ha_rocksdb::check_bloom_and_set_bounds( + THD *thd, const Rdb_key_def &kd, const rocksdb::Slice &eq_cond, + const bool use_all_keys, size_t bound_len, uchar *const lower_bound, + uchar *const upper_bound, rocksdb::Slice *lower_bound_slice, + rocksdb::Slice *upper_bound_slice) { + bool can_use_bloom = can_use_bloom_filter(thd, kd, eq_cond, use_all_keys); + if (!can_use_bloom) { + setup_iterator_bounds(kd, eq_cond, bound_len, lower_bound, upper_bound, + lower_bound_slice, upper_bound_slice); + } + return can_use_bloom; +} + +/** + Deciding if it is possible to use bloom filter or not. + + @detail + Even if bloom filter exists, it is not always possible + to use bloom filter. If using bloom filter when you shouldn't, + false negative may happen -- fewer rows than expected may be returned. + It is users' responsibility to use bloom filter correctly. + + If bloom filter does not exist, return value does not matter because + RocksDB does not use bloom filter internally. + + @param kd + @param eq_cond Equal condition part of the key. This always includes + system index id (4 bytes). + @param use_all_keys True if all key parts are set with equal conditions. + This is aware of extended keys. +*/ +bool ha_rocksdb::can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond, + const bool use_all_keys) { + bool can_use = false; + + if (THDVAR(thd, skip_bloom_filter_on_read)) { + return can_use; + } + + const rocksdb::SliceTransform *prefix_extractor = kd.get_extractor(); + if (prefix_extractor) { + /* + This is an optimized use case for CappedPrefixTransform. + If eq_cond length >= prefix extractor length and if + all keys are used for equal lookup, it is + always possible to use bloom filter. + + Prefix bloom filter can't be used on descending scan with + prefix lookup (i.e. WHERE id1=1 ORDER BY id2 DESC), because of + RocksDB's limitation. On ascending (or not sorting) scan, + keys longer than the capped prefix length will be truncated down + to the capped length and the resulting key is added to the bloom filter. + + Keys shorter than the capped prefix length will be added to + the bloom filter. When keys are looked up, key conditionals + longer than the capped length can be used; key conditionals + shorter require all parts of the key to be available + for the short key match. + */ + if ((use_all_keys && prefix_extractor->InRange(eq_cond)) || + prefix_extractor->SameResultWhenAppended(eq_cond)) { + can_use = true; + } else { + can_use = false; + } + } else { + /* + if prefix extractor is not defined, all key parts have to be + used by eq_cond. + */ + if (use_all_keys) { + can_use = true; + } else { + can_use = false; + } + } + + return can_use; +} + +/* For modules that need access to the global data structures */ +rocksdb::TransactionDB *rdb_get_rocksdb_db() { return rdb; } + +Rdb_cf_manager &rdb_get_cf_manager() { return cf_manager; } + +const rocksdb::BlockBasedTableOptions &rdb_get_table_options() { + return *rocksdb_tbl_options; +} + +bool rdb_is_ttl_enabled() { return rocksdb_enable_ttl; } +bool rdb_is_ttl_read_filtering_enabled() { + return rocksdb_enable_ttl_read_filtering; +} +#ifndef DBUG_OFF +int rdb_dbug_set_ttl_rec_ts() { return rocksdb_debug_ttl_rec_ts; } +int rdb_dbug_set_ttl_snapshot_ts() { return rocksdb_debug_ttl_snapshot_ts; } +int rdb_dbug_set_ttl_read_filter_ts() { + return rocksdb_debug_ttl_read_filter_ts; +} +bool rdb_dbug_set_ttl_ignore_pk() { return rocksdb_debug_ttl_ignore_pk; } +#endif + +void rdb_update_global_stats(const operation_type &type, uint count, + bool is_system_table) { + DBUG_ASSERT(type < ROWS_MAX); + + if (count == 0) { + return; + } + + if (is_system_table) { + global_stats.system_rows[type].add(count); + } else { + global_stats.rows[type].add(count); + } +} + +int rdb_get_table_perf_counters(const char *const tablename, + Rdb_perf_counters *const counters) { + DBUG_ASSERT(tablename != nullptr); + + Rdb_table_handler *table_handler; + table_handler = rdb_open_tables.get_table_handler(tablename); + if (table_handler == nullptr) { + return HA_ERR_ROCKSDB_INVALID_TABLE; + } + + counters->load(table_handler->m_table_perf_context); + + rdb_open_tables.release_table_handler(table_handler); + return HA_EXIT_SUCCESS; +} + +const char *get_rdb_io_error_string(const RDB_IO_ERROR_TYPE err_type) { + // If this assertion fails then this means that a member has been either added + // to or removed from RDB_IO_ERROR_TYPE enum and this function needs to be + // changed to return the appropriate value. + static_assert(RDB_IO_ERROR_LAST == 4, "Please handle all the error types."); + + switch (err_type) { + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_TX_COMMIT: + return "RDB_IO_ERROR_TX_COMMIT"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_DICT_COMMIT: + return "RDB_IO_ERROR_DICT_COMMIT"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_BG_THREAD: + return "RDB_IO_ERROR_BG_THREAD"; + case RDB_IO_ERROR_TYPE::RDB_IO_ERROR_GENERAL: + return "RDB_IO_ERROR_GENERAL"; + default: + DBUG_ASSERT(false); + return "(unknown)"; + } +} + +// In case of core dump generation we want this function NOT to be optimized +// so that we can capture as much data as possible to debug the root cause +// more efficiently. +#ifdef __GNUC__ +#endif +void rdb_handle_io_error(const rocksdb::Status status, + const RDB_IO_ERROR_TYPE err_type) { + if (status.IsIOError()) { + /* skip dumping core if write failed and we are allowed to do so */ +#ifdef MARIAROCKS_NOT_YET + if (skip_core_dump_on_error) { + opt_core_file = false; + } +#endif + switch (err_type) { + case RDB_IO_ERROR_TX_COMMIT: + case RDB_IO_ERROR_DICT_COMMIT: { + rdb_log_status_error(status, "failed to write to WAL"); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on WAL write error."); + abort(); + break; + } + case RDB_IO_ERROR_BG_THREAD: { + rdb_log_status_error(status, "BG thread failed to write to RocksDB"); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on BG write error."); + abort(); + break; + } + case RDB_IO_ERROR_GENERAL: { + rdb_log_status_error(status, "failed on I/O"); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on I/O error."); + abort(); + break; + } + default: + DBUG_ASSERT(0); + break; + } + } else if (status.IsCorruption()) { + rdb_log_status_error(status, "data corruption detected!"); + rdb_persist_corruption_marker(); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting because of data corruption."); + abort(); + } else if (!status.ok()) { + switch (err_type) { + case RDB_IO_ERROR_DICT_COMMIT: { + rdb_log_status_error(status, "Failed to write to WAL (dictionary)"); + /* NO_LINT_DEBUG */ + sql_print_error("MyRocks: aborting on WAL write error."); + abort(); + break; + } + default: + rdb_log_status_error(status, "Failed to read/write in RocksDB"); + break; + } + } +} +#ifdef __GNUC__ +#endif +Rdb_dict_manager *rdb_get_dict_manager(void) { return &dict_manager; } + +Rdb_ddl_manager *rdb_get_ddl_manager(void) { return &ddl_manager; } + +Rdb_binlog_manager *rdb_get_binlog_manager(void) { return &binlog_manager; } + +void rocksdb_set_compaction_options( + my_core::THD *const thd MY_ATTRIBUTE((__unused__)), + my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr, const void *const save) { + if (var_ptr && save) { + *(uint64_t *)var_ptr = *(const uint64_t *)save; + } + const Rdb_compact_params params = { + (uint64_t)rocksdb_compaction_sequential_deletes, + (uint64_t)rocksdb_compaction_sequential_deletes_window, + (uint64_t)rocksdb_compaction_sequential_deletes_file_size}; + if (properties_collector_factory) { + properties_collector_factory->SetCompactionParams(params); + } +} + +void rocksdb_set_table_stats_sampling_pct( + my_core::THD *const thd MY_ATTRIBUTE((__unused__)), + my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + + const uint32_t new_val = *static_cast(save); + + if (new_val != rocksdb_table_stats_sampling_pct) { + rocksdb_table_stats_sampling_pct = new_val; + + if (properties_collector_factory) { + properties_collector_factory->SetTableStatsSamplingPct( + rocksdb_table_stats_sampling_pct); + } + } + + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +/* + This function allows setting the rate limiter's bytes per second value + but only if the rate limiter is turned on which has to be done at startup. + If the rate is already 0 (turned off) or we are changing it to 0 (trying + to turn it off) this function will push a warning to the client and do + nothing. + This is similar to the code in innodb_doublewrite_update (found in + storage/innobase/handler/ha_innodb.cc). +*/ +void rocksdb_set_rate_limiter_bytes_per_sec( + my_core::THD *const thd, + my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { + const uint64_t new_val = *static_cast(save); + if (new_val == 0 || rocksdb_rate_limiter_bytes_per_sec == 0) { + /* + If a rate_limiter was not enabled at startup we can't change it nor + can we disable it if one was created at startup + */ + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, ER_WRONG_ARGUMENTS, + "RocksDB: rocksdb_rate_limiter_bytes_per_sec cannot " + "be dynamically changed to or from 0. Do a clean " + "shutdown if you want to change it from or to 0."); + } else if (new_val != rocksdb_rate_limiter_bytes_per_sec) { + /* Apply the new value to the rate limiter and store it locally */ + DBUG_ASSERT(rocksdb_rate_limiter != nullptr); + rocksdb_rate_limiter_bytes_per_sec = new_val; + rocksdb_rate_limiter->SetBytesPerSecond(new_val); + } +} + +void rocksdb_set_sst_mgr_rate_bytes_per_sec( + my_core::THD *const thd, + my_core::st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + + const uint64_t new_val = *static_cast(save); + + if (new_val != rocksdb_sst_mgr_rate_bytes_per_sec) { + rocksdb_sst_mgr_rate_bytes_per_sec = new_val; + + rocksdb_db_options->sst_file_manager->SetDeleteRateBytesPerSecond( + rocksdb_sst_mgr_rate_bytes_per_sec); + } + + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +void rocksdb_set_delayed_write_rate(THD *thd, struct st_mysql_sys_var *var, + void *var_ptr, const void *save) { + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + const uint64_t new_val = *static_cast(save); + if (rocksdb_delayed_write_rate != new_val) { + rocksdb_delayed_write_rate = new_val; + rocksdb::Status s = + rdb->SetDBOptions({{"delayed_write_rate", std::to_string(new_val)}}); + + if (!s.ok()) { + /* NO_LINT_DEBUG */ + sql_print_warning( + "MyRocks: failed to update delayed_write_rate. " + "status code = %d, status = %s", + s.code(), s.ToString().c_str()); + } + } + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +void rocksdb_set_max_latest_deadlocks(THD *thd, struct st_mysql_sys_var *var, + void *var_ptr, const void *save) { + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + const uint32_t new_val = *static_cast(save); + if (rocksdb_max_latest_deadlocks != new_val) { + rocksdb_max_latest_deadlocks = new_val; + rdb->SetDeadlockInfoBufferSize(rocksdb_max_latest_deadlocks); + } + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +void rdb_set_collation_exception_list(const char *const exception_list) { + DBUG_ASSERT(rdb_collation_exceptions != nullptr); + + if (!rdb_collation_exceptions->set_patterns(exception_list)) { + my_core::warn_about_bad_patterns(rdb_collation_exceptions, + "strict_collation_exceptions"); + } +} + +void rocksdb_set_collation_exception_list(THD *const thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save) { + const char *const val = *static_cast(save); + + rdb_set_collation_exception_list(val == nullptr ? "" : val); + + //psergey-todo: what is the purpose of the below?? + const char *val_copy= val? my_strdup(PSI_INSTRUMENT_ME, val, MYF(0)): nullptr; + my_free(*static_cast(var_ptr)); + *static_cast(var_ptr) = val_copy; +} + +int mysql_value_to_bool(struct st_mysql_value *value, my_bool *return_value) { + int new_value_type = value->value_type(value); + if (new_value_type == MYSQL_VALUE_TYPE_STRING) { + char buf[16]; + int len = sizeof(buf); + const char *str = value->val_str(value, buf, &len); + if (str && (my_strcasecmp(system_charset_info, "true", str) == 0 || + my_strcasecmp(system_charset_info, "on", str) == 0)) { + *return_value = TRUE; + } else if (str && (my_strcasecmp(system_charset_info, "false", str) == 0 || + my_strcasecmp(system_charset_info, "off", str) == 0)) { + *return_value = FALSE; + } else { + return 1; + } + } else if (new_value_type == MYSQL_VALUE_TYPE_INT) { + long long intbuf; + value->val_int(value, &intbuf); + if (intbuf > 1) return 1; + *return_value = intbuf > 0 ? TRUE : FALSE; + } else { + return 1; + } + + return 0; +} + +int rocksdb_check_bulk_load( + THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *save, struct st_mysql_value *value) { + my_bool new_value; + if (mysql_value_to_bool(value, &new_value) != 0) { + return 1; + } + + Rdb_transaction *tx = get_tx_from_thd(thd); + if (tx != nullptr) { + bool is_critical_error; + const int rc = tx->finish_bulk_load(&is_critical_error); + if (rc != 0 && is_critical_error) { + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Error %d finalizing last SST file while " + "setting bulk loading variable", + rc); + THDVAR(thd, bulk_load) = 0; + return 1; + } + } + + *static_cast(save) = new_value; + return 0; +} + +int rocksdb_check_bulk_load_allow_unsorted( + THD *const thd, struct st_mysql_sys_var *var MY_ATTRIBUTE((__unused__)), + void *save, struct st_mysql_value *value) { + my_bool new_value; + if (mysql_value_to_bool(value, &new_value) != 0) { + return 1; + } + + if (THDVAR(thd, bulk_load)) { + my_error(ER_ERROR_WHEN_EXECUTING_COMMAND, MYF(0), "SET", + "Cannot change this setting while bulk load is enabled"); + + return 1; + } + + *static_cast(save) = new_value; + return 0; +} + +static void rocksdb_set_max_background_jobs(THD *thd, + struct st_mysql_sys_var *const var, + void *const var_ptr, + const void *const save) { + DBUG_ASSERT(save != nullptr); + DBUG_ASSERT(rocksdb_db_options != nullptr); + DBUG_ASSERT(rocksdb_db_options->env != nullptr); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + + const int new_val = *static_cast(save); + + if (rocksdb_db_options->max_background_jobs != new_val) { + rocksdb_db_options->max_background_jobs = new_val; + rocksdb::Status s = + rdb->SetDBOptions({{"max_background_jobs", std::to_string(new_val)}}); + + if (!s.ok()) { + /* NO_LINT_DEBUG */ + sql_print_warning( + "MyRocks: failed to update max_background_jobs. " + "Status code = %d, status = %s.", + s.code(), s.ToString().c_str()); + } + } + + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +static void rocksdb_set_bytes_per_sync( + THD *thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { + DBUG_ASSERT(save != nullptr); + DBUG_ASSERT(rocksdb_db_options != nullptr); + DBUG_ASSERT(rocksdb_db_options->env != nullptr); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + + const ulonglong new_val = *static_cast(save); + + if (rocksdb_db_options->bytes_per_sync != new_val) { + rocksdb_db_options->bytes_per_sync = new_val; + rocksdb::Status s = + rdb->SetDBOptions({{"bytes_per_sync", std::to_string(new_val)}}); + + if (!s.ok()) { + /* NO_LINT_DEBUG */ + sql_print_warning( + "MyRocks: failed to update max_background_jobs. " + "Status code = %d, status = %s.", + s.code(), s.ToString().c_str()); + } + } + + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +static void rocksdb_set_wal_bytes_per_sync( + THD *thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *const var_ptr MY_ATTRIBUTE((__unused__)), const void *const save) { + DBUG_ASSERT(save != nullptr); + DBUG_ASSERT(rocksdb_db_options != nullptr); + DBUG_ASSERT(rocksdb_db_options->env != nullptr); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + + const ulonglong new_val = *static_cast(save); + + if (rocksdb_db_options->wal_bytes_per_sync != new_val) { + rocksdb_db_options->wal_bytes_per_sync = new_val; + rocksdb::Status s = + rdb->SetDBOptions({{"wal_bytes_per_sync", std::to_string(new_val)}}); + + if (!s.ok()) { + /* NO_LINT_DEBUG */ + sql_print_warning( + "MyRocks: failed to update max_background_jobs. " + "Status code = %d, status = %s.", + s.code(), s.ToString().c_str()); + } + } + + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +/* + Validating and updating block cache size via sys_var::check path. + SetCapacity may take seconds when reducing block cache, and + sys_var::update holds LOCK_global_system_variables mutex, so + updating block cache size is done at check path instead. +*/ +static int rocksdb_validate_set_block_cache_size( + THD *thd MY_ATTRIBUTE((__unused__)), + struct st_mysql_sys_var *const var MY_ATTRIBUTE((__unused__)), + void *var_ptr, struct st_mysql_value *value) { + DBUG_ASSERT(value != nullptr); + + long long new_value; + + /* value is NULL */ + if (value->val_int(value, &new_value)) { + return HA_EXIT_FAILURE; + } + + if (new_value < RDB_MIN_BLOCK_CACHE_SIZE || + (uint64_t)new_value > (uint64_t)LLONG_MAX) { + return HA_EXIT_FAILURE; + } + + RDB_MUTEX_LOCK_CHECK(rdb_block_cache_resize_mutex); + const rocksdb::BlockBasedTableOptions &table_options = + rdb_get_table_options(); + + if (rocksdb_block_cache_size != new_value && table_options.block_cache) { + table_options.block_cache->SetCapacity(new_value); + } + *static_cast(var_ptr) = static_cast(new_value); + RDB_MUTEX_UNLOCK_CHECK(rdb_block_cache_resize_mutex); + return HA_EXIT_SUCCESS; +} + +static int rocksdb_validate_update_cf_options( + THD * /* unused */, struct st_mysql_sys_var * /*unused*/, void *save, + struct st_mysql_value *value) { + char buff[STRING_BUFFER_USUAL_SIZE]; + const char *str; + int length; + length = sizeof(buff); + str = value->val_str(value, buff, &length); + // In some cases, str can point to buff in the stack. + // This can cause invalid memory access after validation is finished. + // To avoid this kind case, let's alway duplicate the str if str is not + // nullptr + *(const char **)save = (str == nullptr) ? nullptr : my_strdup(PSI_INSTRUMENT_ME, str, MYF(0)); + + if (str == nullptr) { + return HA_EXIT_SUCCESS; + } + + Rdb_cf_options::Name_to_config_t option_map; + + // Basic sanity checking and parsing the options into a map. If this fails + // then there's no point to proceed. + if (!Rdb_cf_options::parse_cf_options(str, &option_map)) { + my_error(ER_WRONG_VALUE_FOR_VAR, MYF(0), "rocksdb_update_cf_options", str); + // Free what we've copied with my_strdup above. + my_free((void*)(*(const char **)save)); + return HA_EXIT_FAILURE; + } + // Loop through option_map and create missing column families + for (Rdb_cf_options::Name_to_config_t::iterator it = option_map.begin(); + it != option_map.end(); ++it) { + cf_manager.get_or_create_cf(rdb, it->first); + } + return HA_EXIT_SUCCESS; +} + +static void rocksdb_set_update_cf_options( + THD *const /* unused */, struct st_mysql_sys_var *const /* unused */, + void *const var_ptr, const void *const save) { + const char *const val = *static_cast(save); + + RDB_MUTEX_LOCK_CHECK(rdb_sysvars_mutex); + + my_free(*reinterpret_cast(var_ptr)); + + if (!val) { + *reinterpret_cast(var_ptr) = nullptr; + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); + return; + } + + DBUG_ASSERT(val != nullptr); + + // Reset the pointers regardless of how much success we had with updating + // the CF options. This will results in consistent behavior and avoids + // dealing with cases when only a subset of CF-s was successfully updated. + *reinterpret_cast(var_ptr) = val; + + // Do the real work of applying the changes. + Rdb_cf_options::Name_to_config_t option_map; + + // This should never fail, because of rocksdb_validate_update_cf_options + if (!Rdb_cf_options::parse_cf_options(val, &option_map)) { + my_free(*reinterpret_cast(var_ptr)); + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); + return; + } + + // For each CF we have, see if we need to update any settings. + for (const auto &cf_name : cf_manager.get_cf_names()) { + DBUG_ASSERT(!cf_name.empty()); + + rocksdb::ColumnFamilyHandle *cfh = cf_manager.get_cf(cf_name); + DBUG_ASSERT(cfh != nullptr); + + const auto it = option_map.find(cf_name); + std::string per_cf_options = (it != option_map.end()) ? it->second : ""; + + if (!per_cf_options.empty()) { + Rdb_cf_options::Name_to_config_t opt_map; + rocksdb::Status s = rocksdb::StringToMap(per_cf_options, &opt_map); + + if (s != rocksdb::Status::OK()) { + // NO_LINT_DEBUG + sql_print_warning( + "MyRocks: failed to convert the options for column " + "family '%s' to a map. %s", + cf_name.c_str(), s.ToString().c_str()); + } else { + DBUG_ASSERT(rdb != nullptr); + + // Finally we can apply the options. + s = rdb->SetOptions(cfh, opt_map); + + if (s != rocksdb::Status::OK()) { + // NO_LINT_DEBUG + sql_print_warning( + "MyRocks: failed to apply the options for column " + "family '%s'. %s", + cf_name.c_str(), s.ToString().c_str()); + } else { + // NO_LINT_DEBUG + sql_print_information( + "MyRocks: options for column family '%s' " + "have been successfully updated.", + cf_name.c_str()); + + // Make sure that data is internally consistent as well and update + // the CF options. This is necessary also to make sure that the CF + // options will be correctly reflected in the relevant table: + // ROCKSDB_CF_OPTIONS in INFORMATION_SCHEMA. + rocksdb::ColumnFamilyOptions cf_options = rdb->GetOptions(cfh); + std::string updated_options; + + s = rocksdb::GetStringFromColumnFamilyOptions(&updated_options, + cf_options); + + DBUG_ASSERT(s == rocksdb::Status::OK()); + DBUG_ASSERT(!updated_options.empty()); + + cf_manager.update_options_map(cf_name, updated_options); + } + } + } + } + + // Our caller (`plugin_var_memalloc_global_update`) will call `my_free` to + // free up resources used before. + + RDB_MUTEX_UNLOCK_CHECK(rdb_sysvars_mutex); +} + +void rdb_queue_save_stats_request() { rdb_bg_thread.request_save_stats(); } + +#ifdef MARIAROCKS_NOT_YET // MDEV-10976 + +void ha_rocksdb::rpl_before_delete_rows() { + DBUG_ENTER_FUNC(); + + m_in_rpl_delete_rows = true; + + DBUG_VOID_RETURN; +} + +void ha_rocksdb::rpl_after_delete_rows() { + DBUG_ENTER_FUNC(); + + m_in_rpl_delete_rows = false; + + DBUG_VOID_RETURN; +} + +void ha_rocksdb::rpl_before_update_rows() { + DBUG_ENTER_FUNC(); + + m_in_rpl_update_rows = true; + + DBUG_VOID_RETURN; +} + +void ha_rocksdb::rpl_after_update_rows() { + DBUG_ENTER_FUNC(); + + m_in_rpl_update_rows = false; + + DBUG_VOID_RETURN; +} + +#if 0 +bool ha_rocksdb::is_read_free_rpl_table() const { + return table->s && m_tbl_def->m_is_read_free_rpl_table; +} +#endif + +/** + @brief + Read Free Replication can be used or not. Returning true means + Read Free Replication can be used. +*/ +bool ha_rocksdb::use_read_free_rpl() const { + DBUG_ENTER_FUNC(); + + if (!ha_thd()->rli_slave || table->triggers || /* !is_read_free_rpl_table()*/ ) { + DBUG_RETURN(false); + } + +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported + switch (rocksdb_read_free_rpl) { + case read_free_rpl_type::OFF: + DBUG_RETURN(false); + case read_free_rpl_type::PK_ONLY: + DBUG_RETURN(!has_hidden_pk(table) && table->s->keys == 1); + case read_free_rpl_type::PK_SK: + DBUG_RETURN(!has_hidden_pk(table)); + } +#else + DBUG_RETURN(false); +#endif + + DBUG_ASSERT(false); + DBUG_RETURN(false); +} +#endif // MARIAROCKS_NOT_YET + +double ha_rocksdb::read_time(uint index, uint ranges, ha_rows rows) { + DBUG_ENTER_FUNC(); + + if (index != table->s->primary_key) { + /* Non covering index range scan */ + DBUG_RETURN(handler::read_time(index, ranges, rows)); + } + + DBUG_RETURN((rows / 20.0) + 1); +} + +void ha_rocksdb::print_error(int error, myf errflag) { + if (error == HA_ERR_ROCKSDB_STATUS_BUSY) { + error = HA_ERR_LOCK_DEADLOCK; + } + handler::print_error(error, errflag); +} + +std::string rdb_corruption_marker_file_name() { + std::string ret(rocksdb_datadir); + ret.append("/ROCKSDB_CORRUPTED"); + return ret; +} + +void sql_print_verbose_info(const char *format, ...) +{ + va_list args; + + if (global_system_variables.log_warnings > 2) { + va_start(args, format); + sql_print_information_v(format, args); + va_end(args); + } +} + +} // namespace myrocks + + +/** + Construct and emit duplicate key error message using information + from table's record buffer. + + @sa print_keydup_error(table, key, msg, errflag, thd, org_table_name). +*/ + +void print_keydup_error(TABLE *table, KEY *key, myf errflag, + const THD *thd, const char *org_table_name) +{ + print_keydup_error(table, key, ER(ER_DUP_ENTRY_WITH_KEY_NAME), errflag); +} + +/* + Register the storage engine plugin outside of myrocks namespace + so that mysql_declare_plugin does not get confused when it does + its name generation. +*/ + + +struct st_mysql_storage_engine rocksdb_storage_engine = { + MYSQL_HANDLERTON_INTERFACE_VERSION}; + +maria_declare_plugin(rocksdb_se){ + MYSQL_STORAGE_ENGINE_PLUGIN, /* Plugin Type */ + &rocksdb_storage_engine, /* Plugin Descriptor */ + "ROCKSDB", /* Plugin Name */ + "Monty Program Ab", /* Plugin Author */ + "RocksDB storage engine", /* Plugin Description */ + PLUGIN_LICENSE_GPL, /* Plugin Licence */ + myrocks::rocksdb_init_func, /* Plugin Entry Point */ + myrocks::rocksdb_done_func, /* Plugin Deinitializer */ + 0x0001, /* version number (0.1) */ + myrocks::rocksdb_status_vars, /* status variables */ + myrocks::rocksdb_system_variables, /* system variables */ + "1.0", /* string version */ + myrocks::MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL +}, + myrocks::rdb_i_s_cfstats, myrocks::rdb_i_s_dbstats, + myrocks::rdb_i_s_perf_context, myrocks::rdb_i_s_perf_context_global, + myrocks::rdb_i_s_cfoptions, myrocks::rdb_i_s_compact_stats, + myrocks::rdb_i_s_global_info, myrocks::rdb_i_s_ddl, + myrocks::rdb_i_s_sst_props, myrocks::rdb_i_s_index_file_map, + myrocks::rdb_i_s_lock_info, myrocks::rdb_i_s_trx_info, + myrocks::rdb_i_s_deadlock_info +maria_declare_plugin_end; diff --git a/storage/rocksdb/ha_rocksdb.h b/storage/rocksdb/ha_rocksdb.h new file mode 100644 index 00000000..05b5341b --- /dev/null +++ b/storage/rocksdb/ha_rocksdb.h @@ -0,0 +1,1069 @@ +/* + Copyright (c) 2012,2013 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ +#pragma once + +#ifdef USE_PRAGMA_INTERFACE +#pragma interface /* gcc class implementation */ +#endif + +/* C++ standard header files */ +#include +#include +#include +#include +#include + +/* MySQL header files */ +#include "./handler.h" /* handler */ +#include "./my_global.h" /* ulonglong */ +#include "./sql_string.h" + +#ifdef _WIN32 +#undef pthread_key_create +#undef pthread_key_delete +#undef pthread_setspecific +#undef pthread_getspecific +#endif + + +/* RocksDB header files */ +#include "rocksdb/cache.h" +#include "rocksdb/merge_operator.h" +#include "rocksdb/perf_context.h" +#include "rocksdb/sst_file_manager.h" +#include "rocksdb/statistics.h" +#include "rocksdb/utilities/options_util.h" +#include "rocksdb/utilities/transaction_db.h" +#include "rocksdb/utilities/write_batch_with_index.h" + +/* MyRocks header files */ +#include "./rdb_buff.h" +#include "./rdb_comparator.h" +#include "./rdb_global.h" +#include "./rdb_index_merge.h" +#include "./rdb_io_watchdog.h" +#include "./rdb_perf_context.h" +#include "./rdb_sst_info.h" +#include "./rdb_utils.h" +#include "./ut0counter.h" + +#include "rdb_mariadb_server_port.h" + +class Field_blob; +class Field_varstring; +/** + @note MyRocks Coding Conventions: + MyRocks code follows the baseline MySQL coding conventions, available at + http://dev.mysql.com/doc/internals/en/coding-guidelines.html, with several + refinements (@see /storage/rocksdb/README file). +*/ + +namespace myrocks { + +class Rdb_converter; +class Rdb_key_def; +class Rdb_tbl_def; +class Rdb_transaction; +class Rdb_transaction_impl; +class Rdb_writebatch_impl; +class Rdb_field_encoder; +/* collations, used in MariaRocks */ +enum collations_used { + COLLATION_UTF8MB4_BIN = 46, + COLLATION_LATIN1_BIN = 47, + COLLATION_UTF16LE_BIN = 55, + COLLATION_UTF32_BIN = 61, + COLLATION_UTF16_BIN = 62, + COLLATION_BINARY = 63, + COLLATION_UTF8_BIN = 83 +}; + +#if 0 // MARIAROCKS_NOT_YET : read-free replication is not supported +extern char *rocksdb_read_free_rpl_tables; +#if defined(HAVE_PSI_INTERFACE) +extern PSI_rwlock_key key_rwlock_read_free_rpl_tables; +#endif +extern Regex_list_handler rdb_read_free_regex_handler; +#endif + +/** + @brief + Rdb_table_handler is a reference-counted structure storing information for + each open table. All the objects are stored in a global hash map. + + //TODO: join this with Rdb_tbl_def ? +*/ +struct Rdb_table_handler { + char *m_table_name; + uint m_table_name_length; + int m_ref_count; + atomic_stat m_lock_wait_timeout_counter; + atomic_stat m_deadlock_counter; + + my_core::THR_LOCK m_thr_lock; ///< MySQL latch needed by m_db_lock + + /* Stores cumulative table statistics */ + my_io_perf_atomic_t m_io_perf_read; + my_io_perf_atomic_t m_io_perf_write; + Rdb_atomic_perf_counters m_table_perf_context; + + /* Stores cached memtable estimate statistics */ + std::atomic_uint m_mtcache_lock; + uint64_t m_mtcache_count; + uint64_t m_mtcache_size; + uint64_t m_mtcache_last_update; +}; + +} // namespace myrocks + +/* Provide hash function for GL_INDEX_ID so we can include it in sets */ +namespace std { +template <> +struct hash { + std::size_t operator()(const myrocks::GL_INDEX_ID &gl_index_id) const { + const uint64_t val = + ((uint64_t)gl_index_id.cf_id << 32 | (uint64_t)gl_index_id.index_id); + return std::hash()(val); + } +}; +} // namespace std + +namespace myrocks { + +/** + @brief + Class definition for ROCKSDB storage engine plugin handler +*/ + +class ha_rocksdb : public my_core::handler { + my_core::THR_LOCK_DATA m_db_lock; ///< MySQL database lock + + Rdb_table_handler *m_table_handler; ///< Open table handler + + /* Iterator used for range scans and for full table/index scans */ + rocksdb::Iterator *m_scan_it; + + /* Same as handler::end_key but for start. Reverse-ordered scans need it */ + key_range m_save_start_range; + const key_range *m_start_range; + + /* Whether m_scan_it was created with skip_bloom=true */ + bool m_scan_it_skips_bloom; + + const rocksdb::Snapshot *m_scan_it_snapshot; + + /* Buffers used for upper/lower bounds for m_scan_it. */ + uchar *m_scan_it_lower_bound; + uchar *m_scan_it_upper_bound; + rocksdb::Slice m_scan_it_lower_bound_slice; + rocksdb::Slice m_scan_it_upper_bound_slice; + + Rdb_tbl_def *m_tbl_def; + + /* Primary Key encoder from KeyTupleFormat to StorageFormat */ + std::shared_ptr m_pk_descr; + + /* Array of index descriptors */ + std::shared_ptr *m_key_descr_arr; + + bool check_keyread_allowed(uint inx, uint part, bool all_parts) const; + + /* + Number of key parts in PK. This is the same as + table->key_info[table->s->primary_key].keyparts + */ + uint m_pk_key_parts; + + /* + TRUE <=> Primary Key columns can be decoded from the index + */ + mutable bool m_pk_can_be_decoded; + + uchar *m_pk_tuple; /* Buffer for storing PK in KeyTupleFormat */ + uchar *m_pk_packed_tuple; /* Buffer for storing PK in StorageFormat */ + // ^^ todo: change it to 'char*'? TODO: ^ can we join this with last_rowkey? + + /* + Temporary buffers for storing the key part of the Key/Value pair + for secondary indexes. + */ + uchar *m_sk_packed_tuple; + + /* + Temporary buffers for storing end key part of the Key/Value pair. + This is used for range scan only. + */ + uchar *m_end_key_packed_tuple; + + Rdb_string_writer m_sk_tails; + Rdb_string_writer m_pk_unpack_info; + + /* + ha_rockdb->index_read_map(.. HA_READ_KEY_EXACT or similar) will save here + mem-comparable form of the index lookup tuple. + */ + uchar *m_sk_match_prefix; + uint m_sk_match_length; + + /* Buffer space for the above */ + uchar *m_sk_match_prefix_buf; + + /* Second buffers, used by UPDATE. */ + uchar *m_sk_packed_tuple_old; + Rdb_string_writer m_sk_tails_old; + + /* Buffers used for duplicate checking during unique_index_creation */ + uchar *m_dup_sk_packed_tuple; + uchar *m_dup_sk_packed_tuple_old; + + /* + Temporary space for packing VARCHARs (we provide it to + pack_record()/pack_index_tuple() calls). + */ + uchar *m_pack_buffer; + + /* + A buffer long enough to store table record + */ + uchar *m_record_buffer; + + /* class to convert between Mysql format and RocksDB format*/ + std::shared_ptr m_converter; + + /* + Pointer to the original TTL timestamp value (8 bytes) during UPDATE. + */ + char *m_ttl_bytes; + /* + The TTL timestamp value can change if the explicit TTL column is + updated. If we detect this when updating the PK, we indicate it here so + we know we must always update any SK's. + */ + bool m_ttl_bytes_updated; + + /* rowkey of the last record we've read, in StorageFormat. */ + String m_last_rowkey; + + /* + Last retrieved record, in table->record[0] data format. + + This is used only when we get the record with rocksdb's Get() call (The + other option is when we get a rocksdb::Slice from an iterator) + */ + rocksdb::PinnableSlice m_retrieved_record; + + /* Type of locking to apply to rows */ + enum { RDB_LOCK_NONE, RDB_LOCK_READ, RDB_LOCK_WRITE } m_lock_rows; + + /* TRUE means we're doing an index-only read. FALSE means otherwise. */ + bool m_keyread_only; + + bool m_skip_scan_it_next_call; + + /* TRUE means we are accessing the first row after a snapshot was created */ + bool m_rnd_scan_is_new_snapshot; + + /* + TRUE means we should skip unique key checks for this table if the + replication lag gets too large + */ + bool m_skip_unique_check; + + /* + TRUE means INSERT ON DUPLICATE KEY UPDATE. In such case we can optimize by + remember the failed attempt (if there is one that violates uniqueness check) + in write_row and in the following index_read to skip the lock check and read + entirely + */ + bool m_insert_with_update; + + /* TRUE if last time the insertion failed due to duplicated PK */ + bool m_dup_pk_found; + +#ifndef DBUG_OFF + /* Last retreived record for sanity checking */ + String m_dup_pk_retrieved_record; +#endif + + /** + @brief + This is a bitmap of indexes (i.e. a set) whose keys (in future, values) may + be changed by this statement. Indexes that are not in the bitmap do not need + to be updated. + @note Valid inside UPDATE statements, IIF(m_update_scope_is_valid == true). + */ + my_core::key_map m_update_scope; + bool m_update_scope_is_valid; + + /* SST information used for bulk loading the primary key */ + std::shared_ptr m_sst_info; + + /* + MySQL index number for duplicate key error + */ + uint m_dupp_errkey; + + int create_key_defs(const TABLE *const table_arg, + Rdb_tbl_def *const tbl_def_arg, + const TABLE *const old_table_arg = nullptr, + const Rdb_tbl_def *const old_tbl_def_arg = nullptr) const + MY_ATTRIBUTE((__nonnull__(2, 3), __warn_unused_result__)); + int secondary_index_read(const int keyno, uchar *const buf) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + void setup_iterator_for_rnd_scan(); + bool is_ascending(const Rdb_key_def &keydef, + enum ha_rkey_function find_flag) const + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + void setup_iterator_bounds(const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond, size_t bound_len, + uchar *const lower_bound, uchar *const upper_bound, + rocksdb::Slice *lower_bound_slice, + rocksdb::Slice *upper_bound_slice); + bool can_use_bloom_filter(THD *thd, const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond, + const bool use_all_keys); + bool check_bloom_and_set_bounds(THD *thd, const Rdb_key_def &kd, + const rocksdb::Slice &eq_cond, + const bool use_all_keys, size_t bound_len, + uchar *const lower_bound, + uchar *const upper_bound, + rocksdb::Slice *lower_bound_slice, + rocksdb::Slice *upper_bound_slice); + void setup_scan_iterator(const Rdb_key_def &kd, rocksdb::Slice *slice, + const bool use_all_keys, const uint eq_cond_len) + MY_ATTRIBUTE((__nonnull__)); + void release_scan_iterator(void); + + rocksdb::Status get_for_update( + Rdb_transaction *const tx, + rocksdb::ColumnFamilyHandle *const column_family, + const rocksdb::Slice &key, rocksdb::PinnableSlice *value) const; + + int get_row_by_rowid(uchar *const buf, const char *const rowid, + const uint rowid_size, const bool skip_lookup = false, + const bool skip_ttl_check = true) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int get_row_by_rowid(uchar *const buf, const uchar *const rowid, + const uint rowid_size, const bool skip_lookup = false, + const bool skip_ttl_check = true) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)) { + return get_row_by_rowid(buf, reinterpret_cast(rowid), + rowid_size, skip_lookup, skip_ttl_check); + } + + void load_auto_incr_value(); + ulonglong load_auto_incr_value_from_index(); + void update_auto_incr_val(ulonglong val); + void update_auto_incr_val_from_field(); + rocksdb::Status get_datadic_auto_incr(Rdb_transaction *const tx, + const GL_INDEX_ID &gl_index_id, + ulonglong *new_val) const; + longlong update_hidden_pk_val(); + int load_hidden_pk_value() MY_ATTRIBUTE((__warn_unused_result__)); + int read_hidden_pk_id_from_rowkey(longlong *const hidden_pk_id) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + bool can_use_single_delete(const uint index) const + MY_ATTRIBUTE((__warn_unused_result__)); + bool is_blind_delete_enabled(); + bool skip_unique_check() const MY_ATTRIBUTE((__warn_unused_result__)); +#ifdef MARIAROCKS_NOT_YET // MDEV-10975 + void set_force_skip_unique_check(bool skip) override; +#endif + bool commit_in_the_middle() MY_ATTRIBUTE((__warn_unused_result__)); + bool do_bulk_commit(Rdb_transaction *const tx) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + bool has_hidden_pk(const TABLE *const table) const + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + void update_row_stats(const operation_type &type); + + void set_last_rowkey(const uchar *const old_data); + + /* + For the active index, indicates which columns must be covered for the + current lookup to be covered. If the bitmap field is null, that means this + index does not cover the current lookup for any record. + */ + MY_BITMAP m_lookup_bitmap = {nullptr, nullptr, nullptr, 0, 0}; + + int alloc_key_buffers(const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg, + bool alloc_alter_buffers = false) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + void free_key_buffers(); + + // the buffer size should be at least 2*Rdb_key_def::INDEX_NUMBER_SIZE + rocksdb::Range get_range(const int i, uchar buf[]) const; + + /* + Perf timers for data reads + */ + Rdb_io_perf m_io_perf; + + /* + Update stats + */ + void update_stats(void); + + public: + /* + The following two are currently only used for getting the range bounds + from QUICK_SELECT_DESC. + We don't need to implement prepare_index_key_scan[_map] because it is + only used with HA_READ_KEY_EXACT and HA_READ_PREFIX_LAST where one + can infer the bounds of the range being scanned, anyway. + */ + int prepare_index_scan() override; + int prepare_range_scan(const key_range *start_key, + const key_range *end_key) override; + + /* + Controls whether writes include checksums. This is updated from the session + variable + at the start of each query. + */ + bool m_store_row_debug_checksums; + + int m_checksums_pct; + + ha_rocksdb(my_core::handlerton *const hton, + my_core::TABLE_SHARE *const table_arg); + virtual ~ha_rocksdb() override { + int err MY_ATTRIBUTE((__unused__)); + err = finalize_bulk_load(false); + if (err != 0) { + // NO_LINT_DEBUG + sql_print_error( + "RocksDB: Error %d finalizing bulk load while closing " + "handler.", + err); + } + } + + /** @brief + The name that will be used for display purposes. + */ + const char *table_type() const /*override*/ { + DBUG_ENTER_FUNC(); + // MariaDB: this function is not virtual, however ha_innodb + // declares it (and then never uses!) psergey-merge-todo:. + DBUG_RETURN(rocksdb_hton_name); + } + + /* The following is only used by SHOW KEYS: */ + const char *index_type(uint inx) override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN("LSMTREE"); + } + + /* + Not present in MariaDB: + const char **bas_ext() const override; + */ + + /* + Returns the name of the table's base name + */ + const std::string &get_table_basename() const; + + /** @brief + This is a list of flags that indicate what functionality the storage engine + implements. The current table flags are documented in handler.h + */ + ulonglong table_flags() const override ; +private: + bool init_with_fields(); /* no 'override' in MariaDB */ +public: + /** @brief + This is a bitmap of flags that indicates how the storage engine + implements indexes. The current index flags are documented in + handler.h. If you do not implement indexes, just return zero here. + + @details + part is the key part to check. First key part is 0. + If all_parts is set, MySQL wants to know the flags for the combined + index, up to and including 'part'. + */ + ulong index_flags(uint inx, uint part, bool all_parts) const override; + + const key_map *keys_to_use_for_scanning() override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(&key_map_full); + } + + bool should_store_row_debug_checksums() const { + return m_store_row_debug_checksums && (rand() % 100 < m_checksums_pct); + } + + int rename_table(const char *const from, const char *const to) override + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int convert_record_from_storage_format(const rocksdb::Slice *const key, + const rocksdb::Slice *const value, + uchar *const buf) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int convert_record_from_storage_format(const rocksdb::Slice *const key, + uchar *const buf) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + static const std::vector parse_into_tokens(const std::string &s, + const char delim); + + static const std::string generate_cf_name( + const uint index, const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg, bool *per_part_match_found); + + static const char *get_key_name(const uint index, + const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + static const char *get_key_comment(const uint index, + const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + static const std::string get_table_comment(const TABLE *const table_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + static bool is_hidden_pk(const uint index, const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + static uint pk_index(const TABLE *const table_arg, + const Rdb_tbl_def *const tbl_def_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + static bool is_pk(const uint index, const TABLE *table_arg, + const Rdb_tbl_def *tbl_def_arg) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + /** @brief + unireg.cc will call max_supported_record_length(), max_supported_keys(), + max_supported_key_parts(), uint max_supported_key_length() + to make sure that the storage engine can handle the data it is about to + send. Return *real* limits of your storage engine here; MySQL will do + min(your_limits, MySQL_limits) automatically. + */ + uint max_supported_record_length() const override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(HA_MAX_REC_LENGTH); + } + + uint max_supported_keys() const override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(MAX_INDEXES); + } + + uint max_supported_key_parts() const override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(MAX_REF_PARTS); + } + + uint max_supported_key_part_length() const override; + + /** @brief + unireg.cc will call this to make sure that the storage engine can handle + the data it is about to send. Return *real* limits of your storage engine + here; MySQL will do min(your_limits, MySQL_limits) automatically. + + @details + There is no need to implement ..._key_... methods if your engine doesn't + support indexes. + */ + uint max_supported_key_length() const override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(16 * 1024); /* just to return something*/ + } + + /** + TODO: return actual upper bound of number of records in the table. + (e.g. save number of records seen on full table scan and/or use file size + as upper bound) + */ + ha_rows estimate_rows_upper_bound() override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN(HA_POS_ERROR); + } + + /* At the moment, we're ok with default handler::index_init() implementation. + */ + int index_read_map(uchar *const buf, const uchar *const key, + key_part_map keypart_map, + enum ha_rkey_function find_flag) override + MY_ATTRIBUTE((__warn_unused_result__)); + + int index_read_map_impl(uchar *const buf, const uchar *const key, + key_part_map keypart_map, + enum ha_rkey_function find_flag, + const key_range *end_key) + MY_ATTRIBUTE((__warn_unused_result__)); + + bool is_using_full_key(key_part_map keypart_map, uint actual_key_parts); + int read_range_first(const key_range *const start_key, + const key_range *const end_key, bool eq_range, + bool sorted) override + MY_ATTRIBUTE((__warn_unused_result__)); + + virtual double scan_time() override { + DBUG_ENTER_FUNC(); + + DBUG_RETURN( + static_cast((stats.records + stats.deleted) / 20.0 + 10)); + } + + virtual double read_time(uint, uint, ha_rows rows) override; + virtual void print_error(int error, myf errflag) override; + + int open(const char *const name, int mode, uint test_if_locked) override + MY_ATTRIBUTE((__warn_unused_result__)); + int close(void) override MY_ATTRIBUTE((__warn_unused_result__)); + + int write_row(const uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + int update_row(const uchar *const old_data, const uchar *const new_data) override + MY_ATTRIBUTE((__warn_unused_result__)); + int delete_row(const uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + rocksdb::Status delete_or_singledelete(uint index, Rdb_transaction *const tx, + rocksdb::ColumnFamilyHandle *const cf, + const rocksdb::Slice &key) + MY_ATTRIBUTE((__warn_unused_result__)); + + int index_next(uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + int index_next_with_direction(uchar *const buf, bool move_forward) + MY_ATTRIBUTE((__warn_unused_result__)); + int index_prev(uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + + int index_first(uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + int index_last(uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + + class Item *idx_cond_push(uint keyno, class Item *const idx_cond) override; + /* + Default implementation from cancel_pushed_idx_cond() suits us + */ + private: + struct key_def_cf_info { + rocksdb::ColumnFamilyHandle *cf_handle; + bool is_reverse_cf; + bool is_per_partition_cf; + }; + + struct update_row_info { + Rdb_transaction *tx; + const uchar *new_data; + const uchar *old_data; + rocksdb::Slice new_pk_slice; + rocksdb::Slice old_pk_slice; + rocksdb::Slice old_pk_rec; + + // "unpack_info" data for the new PK value + Rdb_string_writer *new_pk_unpack_info; + + longlong hidden_pk_id; + bool skip_unique_check; + }; + + /* + Used to check for duplicate entries during fast unique secondary index + creation. + */ + struct unique_sk_buf_info { + bool sk_buf_switch = false; + rocksdb::Slice sk_memcmp_key; + rocksdb::Slice sk_memcmp_key_old; + uchar *dup_sk_buf; + uchar *dup_sk_buf_old; + + /* + This method is meant to be called back to back during inplace creation + of unique indexes. It will switch between two buffers, which + will each store the memcmp form of secondary keys, which are then + converted to slices in sk_memcmp_key or sk_memcmp_key_old. + + Switching buffers on each iteration allows us to retain the + sk_memcmp_key_old value for duplicate comparison. + */ + inline uchar *swap_and_get_sk_buf() { + sk_buf_switch = !sk_buf_switch; + return sk_buf_switch ? dup_sk_buf : dup_sk_buf_old; + } + }; + + int create_cfs(const TABLE *const table_arg, Rdb_tbl_def *const tbl_def_arg, + std::array *const cfs) + const MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int create_key_def(const TABLE *const table_arg, const uint i, + const Rdb_tbl_def *const tbl_def_arg, + std::shared_ptr *const new_key_def, + const struct key_def_cf_info &cf_info, uint64 ttl_duration, + const std::string &ttl_column) const + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int create_inplace_key_defs( + const TABLE *const table_arg, Rdb_tbl_def *vtbl_def_arg, + const TABLE *const old_table_arg, + const Rdb_tbl_def *const old_tbl_def_arg, + const std::array &cf, + uint64 ttl_duration, const std::string &ttl_column) const + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + std::unordered_map get_old_key_positions( + const TABLE *table_arg, const Rdb_tbl_def *tbl_def_arg, + const TABLE *old_table_arg, const Rdb_tbl_def *old_tbl_def_arg) const + MY_ATTRIBUTE((__nonnull__)); + + using handler::compare_key_parts; + int compare_key_parts(const KEY *const old_key, + const KEY *const new_key) const + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int compare_keys(const KEY *const old_key, const KEY *const new_key) const + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + bool should_hide_ttl_rec(const Rdb_key_def &kd, + const rocksdb::Slice &ttl_rec_val, + const int64_t curr_ts) + MY_ATTRIBUTE((__warn_unused_result__)); + int rocksdb_skip_expired_records(const Rdb_key_def &kd, + rocksdb::Iterator *const iter, + bool seek_backward); + + int index_first_intern(uchar *buf) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int index_last_intern(uchar *buf) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int find_icp_matching_index_rec(const bool move_forward, uchar *const buf) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + void calc_updated_indexes(); + int update_write_row(const uchar *const old_data, const uchar *const new_data, + const bool skip_unique_check) + MY_ATTRIBUTE((__warn_unused_result__)); + int get_pk_for_update(struct update_row_info *const row_info); + int check_and_lock_unique_pk(const uint key_id, + const struct update_row_info &row_info, + bool *const found) + MY_ATTRIBUTE((__warn_unused_result__)); + int check_and_lock_sk(const uint key_id, + const struct update_row_info &row_info, + bool *const found) + MY_ATTRIBUTE((__warn_unused_result__)); + int check_uniqueness_and_lock(const struct update_row_info &row_info, + bool pk_changed) + MY_ATTRIBUTE((__warn_unused_result__)); + bool over_bulk_load_threshold(int *err) + MY_ATTRIBUTE((__warn_unused_result__)); + int check_duplicate_sk(const TABLE *table_arg, const Rdb_key_def &key_def, + const rocksdb::Slice *key, + struct unique_sk_buf_info *sk_info) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int bulk_load_key(Rdb_transaction *const tx, const Rdb_key_def &kd, + const rocksdb::Slice &key, const rocksdb::Slice &value, + bool sort) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + void update_bytes_written(ulonglong bytes_written); + int update_write_pk(const Rdb_key_def &kd, + const struct update_row_info &row_info, + const bool pk_changed) + MY_ATTRIBUTE((__warn_unused_result__)); + int update_write_sk(const TABLE *const table_arg, const Rdb_key_def &kd, + const struct update_row_info &row_info, + const bool bulk_load_sk) + MY_ATTRIBUTE((__warn_unused_result__)); + int update_write_indexes(const struct update_row_info &row_info, + const bool pk_changed) + MY_ATTRIBUTE((__warn_unused_result__)); + + int read_key_exact(const Rdb_key_def &kd, rocksdb::Iterator *const iter, + const bool using_full_key, const rocksdb::Slice &key_slice, + const int64_t ttl_filter_ts) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int read_before_key(const Rdb_key_def &kd, const bool using_full_key, + const rocksdb::Slice &key_slice, + const int64_t ttl_filter_ts) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int read_after_key(const Rdb_key_def &kd, const rocksdb::Slice &key_slice, + const int64_t ttl_filter_ts) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int position_to_correct_key(const Rdb_key_def &kd, + const enum ha_rkey_function &find_flag, + const bool full_key_match, const uchar *const key, + const key_part_map &keypart_map, + const rocksdb::Slice &key_slice, + bool *const move_forward, + const int64_t ttl_filter_ts) + MY_ATTRIBUTE((__warn_unused_result__)); + + int read_row_from_primary_key(uchar *const buf) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + int read_row_from_secondary_key(uchar *const buf, const Rdb_key_def &kd, + bool move_forward) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int calc_eq_cond_len(const Rdb_key_def &kd, + const enum ha_rkey_function &find_flag, + const rocksdb::Slice &slice, + const int bytes_changed_by_succ, + const key_range *const end_key, + uint *const end_key_packed_size) + MY_ATTRIBUTE((__warn_unused_result__)); + + Rdb_tbl_def *get_table_if_exists(const char *const tablename) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + void read_thd_vars(THD *const thd) MY_ATTRIBUTE((__nonnull__)); + + bool contains_foreign_key(THD *const thd) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int inplace_populate_sk( + TABLE *const table_arg, + const std::unordered_set> &indexes) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + + int finalize_bulk_load(bool print_client_error = true) + MY_ATTRIBUTE((__warn_unused_result__)); + + int calculate_stats_for_table() MY_ATTRIBUTE((__warn_unused_result__)); + + bool should_skip_invalidated_record(const int rc); + bool should_recreate_snapshot(const int rc, const bool is_new_snapshot); + bool can_assume_tracked(THD *thd); + + public: + int index_init(uint idx, bool sorted) override + MY_ATTRIBUTE((__warn_unused_result__)); + int index_end() override MY_ATTRIBUTE((__warn_unused_result__)); + + void unlock_row() override; + + /** @brief + Unlike index_init(), rnd_init() can be called two consecutive times + without rnd_end() in between (it only makes sense if scan=1). In this + case, the second call should prepare for the new table scan (e.g if + rnd_init() allocates the cursor, the second call should position the + cursor to the start of the table; no need to deallocate and allocate + it again. This is a required method. + */ + int rnd_init(bool scan) override MY_ATTRIBUTE((__warn_unused_result__)); + int rnd_end() override MY_ATTRIBUTE((__warn_unused_result__)); + + int rnd_next(uchar *const buf) override + MY_ATTRIBUTE((__warn_unused_result__)); + int rnd_next_with_direction(uchar *const buf, bool move_forward) + MY_ATTRIBUTE((__warn_unused_result__)); + + int rnd_pos(uchar *const buf, uchar *const pos) override + MY_ATTRIBUTE((__warn_unused_result__)); + void position(const uchar *const record) override; + int info(uint) override; + + /* This function will always return success, therefore no annotation related + * to checking the return value. Can't change the signature because it's + * required by the interface. */ + int extra(enum ha_extra_function operation) override; + + int start_stmt(THD *const thd, thr_lock_type lock_type) override + MY_ATTRIBUTE((__warn_unused_result__)); + int external_lock(THD *const thd, int lock_type) override + MY_ATTRIBUTE((__warn_unused_result__)); + int truncate() override MY_ATTRIBUTE((__warn_unused_result__)); + + int reset() override { + DBUG_ENTER_FUNC(); + + /* Free blob data */ + m_retrieved_record.Reset(); + + DBUG_RETURN(HA_EXIT_SUCCESS); + } + + int check(THD *const thd, HA_CHECK_OPT *const check_opt) override + MY_ATTRIBUTE((__warn_unused_result__)); + int remove_rows(Rdb_tbl_def *const tbl); + ha_rows records_in_range(uint inx, + const key_range *const min_key, + const key_range *const max_key, + page_range *pages) override + MY_ATTRIBUTE((__warn_unused_result__)); + + int delete_table(Rdb_tbl_def *const tbl); + int delete_table(const char *const from) override + MY_ATTRIBUTE((__warn_unused_result__)); + int create(const char *const name, TABLE *const form, + HA_CREATE_INFO *const create_info) override + MY_ATTRIBUTE((__warn_unused_result__)); + int create_table(const std::string &table_name, const TABLE *table_arg, + ulonglong auto_increment_value); + bool check_if_incompatible_data(HA_CREATE_INFO *const info, + uint table_changes) override + MY_ATTRIBUTE((__warn_unused_result__)); + + THR_LOCK_DATA **store_lock(THD *const thd, THR_LOCK_DATA **to, + enum thr_lock_type lock_type) override + MY_ATTRIBUTE((__warn_unused_result__)); + + my_bool register_query_cache_table(THD *const thd, const char *table_key, + uint key_length, + qc_engine_callback *const engine_callback, + ulonglong *const engine_data) override { + DBUG_ENTER_FUNC(); + + /* Currently, we don't support query cache */ + DBUG_RETURN(FALSE); + } + + bool get_error_message(const int error, String *const buf) override + MY_ATTRIBUTE((__nonnull__)); + + static int rdb_error_to_mysql(const rocksdb::Status &s, + const char *msg = nullptr) + MY_ATTRIBUTE((__warn_unused_result__)); + + void get_auto_increment(ulonglong offset, ulonglong increment, + ulonglong nb_desired_values, + ulonglong *const first_value, + ulonglong *const nb_reserved_values) override; + void update_create_info(HA_CREATE_INFO *const create_info) override; + int optimize(THD *const thd, HA_CHECK_OPT *const check_opt) override + MY_ATTRIBUTE((__warn_unused_result__)); + int analyze(THD *const thd, HA_CHECK_OPT *const check_opt) override + MY_ATTRIBUTE((__warn_unused_result__)); + + enum_alter_inplace_result check_if_supported_inplace_alter( + TABLE *altered_table, + my_core::Alter_inplace_info *const ha_alter_info) override; + + bool prepare_inplace_alter_table( + TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info) override; + + bool inplace_alter_table( + TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info) override; + + bool commit_inplace_alter_table( + TABLE *const altered_table, + my_core::Alter_inplace_info *const ha_alter_info, bool commit) override; + + void set_skip_unique_check_tables(const char *const whitelist); +#ifdef MARIAROCKS_NOT_YET // MDEV-10976 + bool is_read_free_rpl_table() const; +#endif + +#ifdef MARIAROCKS_NOT_YET // MDEV-10976 + public: + virtual void rpl_before_delete_rows() override; + virtual void rpl_after_delete_rows() override; + virtual void rpl_before_update_rows() override; + virtual void rpl_after_update_rows() override; + virtual bool use_read_free_rpl() const override; +#endif // MARIAROCKS_NOT_YET + + private: + /* Flags tracking if we are inside different replication operation */ + bool m_in_rpl_delete_rows; + bool m_in_rpl_update_rows; + + bool m_force_skip_unique_check; +}; + +/* + Helper class for in-place alter, for storing handler context between inplace + alter calls +*/ +struct Rdb_inplace_alter_ctx : public my_core::inplace_alter_handler_ctx { + /* The new table definition */ + Rdb_tbl_def *const m_new_tdef; + + /* Stores the original key definitions */ + std::shared_ptr *const m_old_key_descr; + + /* Stores the new key definitions */ + std::shared_ptr *m_new_key_descr; + + /* Stores the old number of key definitions */ + const uint m_old_n_keys; + + /* Stores the new number of key definitions */ + const uint m_new_n_keys; + + /* Stores the added key glids */ + const std::unordered_set> m_added_indexes; + + /* Stores the dropped key glids */ + const std::unordered_set m_dropped_index_ids; + + /* Stores number of keys to add */ + const uint m_n_added_keys; + + /* Stores number of keys to drop */ + const uint m_n_dropped_keys; + + /* Stores the largest current auto increment value in the index */ + const ulonglong m_max_auto_incr; + + Rdb_inplace_alter_ctx( + Rdb_tbl_def *new_tdef, std::shared_ptr *old_key_descr, + std::shared_ptr *new_key_descr, uint old_n_keys, + uint new_n_keys, + std::unordered_set> added_indexes, + std::unordered_set dropped_index_ids, uint n_added_keys, + uint n_dropped_keys, ulonglong max_auto_incr) + : my_core::inplace_alter_handler_ctx(), + m_new_tdef(new_tdef), + m_old_key_descr(old_key_descr), + m_new_key_descr(new_key_descr), + m_old_n_keys(old_n_keys), + m_new_n_keys(new_n_keys), + m_added_indexes(added_indexes), + m_dropped_index_ids(dropped_index_ids), + m_n_added_keys(n_added_keys), + m_n_dropped_keys(n_dropped_keys), + m_max_auto_incr(max_auto_incr) {} + + ~Rdb_inplace_alter_ctx() {} + + private: + /* Disable Copying */ + Rdb_inplace_alter_ctx(const Rdb_inplace_alter_ctx &); + Rdb_inplace_alter_ctx &operator=(const Rdb_inplace_alter_ctx &); +}; + +// file name indicating RocksDB data corruption +std::string rdb_corruption_marker_file_name(); + +const int MYROCKS_MARIADB_PLUGIN_MATURITY_LEVEL= MariaDB_PLUGIN_MATURITY_STABLE; + +extern bool prevent_myrocks_loading; + +void sql_print_verbose_info(const char *format, ...); + +} // namespace myrocks + diff --git a/storage/rocksdb/ha_rocksdb_proto.h b/storage/rocksdb/ha_rocksdb_proto.h new file mode 100644 index 00000000..03d24957 --- /dev/null +++ b/storage/rocksdb/ha_rocksdb_proto.h @@ -0,0 +1,103 @@ +/* + Copyright (c) 2012,2013 Monty Program Ab + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ +#pragma once + +/* C++ standard header files */ +#include +#include + +/* MySQL header files */ +#include "./sql_string.h" + +/* RocksDB includes */ +#include "rocksdb/table.h" +#include "rocksdb/utilities/transaction_db.h" + +namespace myrocks { + +enum RDB_IO_ERROR_TYPE { + RDB_IO_ERROR_TX_COMMIT, + RDB_IO_ERROR_DICT_COMMIT, + RDB_IO_ERROR_BG_THREAD, + RDB_IO_ERROR_GENERAL, + RDB_IO_ERROR_LAST +}; + +const char *get_rdb_io_error_string(const RDB_IO_ERROR_TYPE err_type); + +void rdb_handle_io_error(const rocksdb::Status status, + const RDB_IO_ERROR_TYPE err_type) +#if defined(__clang__) + MY_ATTRIBUTE((optnone)); +#else + MY_ATTRIBUTE((noinline,noclone)); +#endif + +int rdb_normalize_tablename(const std::string &tablename, std::string *str) + MY_ATTRIBUTE((__nonnull__, __warn_unused_result__)); + +int rdb_split_normalized_tablename(const std::string &fullname, std::string *db, + std::string *table = nullptr, + std::string *partition = nullptr) + MY_ATTRIBUTE((__warn_unused_result__)); + +std::vector rdb_get_open_table_names(void); + +class Rdb_perf_counters; +int rdb_get_table_perf_counters(const char *tablename, + Rdb_perf_counters *counters) + MY_ATTRIBUTE((__nonnull__(2))); + +void rdb_get_global_perf_counters(Rdb_perf_counters *counters) + MY_ATTRIBUTE((__nonnull__(1))); + +void rdb_queue_save_stats_request(); + +/* + Access to singleton objects. +*/ + +rocksdb::TransactionDB *rdb_get_rocksdb_db(); + +class Rdb_cf_manager; +Rdb_cf_manager &rdb_get_cf_manager(); + +const rocksdb::BlockBasedTableOptions &rdb_get_table_options(); +bool rdb_is_ttl_enabled(); +bool rdb_is_ttl_read_filtering_enabled(); +#ifndef DBUG_OFF +int rdb_dbug_set_ttl_rec_ts(); +int rdb_dbug_set_ttl_snapshot_ts(); +int rdb_dbug_set_ttl_read_filter_ts(); +bool rdb_dbug_set_ttl_ignore_pk(); +#endif + +enum operation_type : int; +void rdb_update_global_stats(const operation_type &type, uint count, + bool is_system_table = false); + +class Rdb_dict_manager; +Rdb_dict_manager *rdb_get_dict_manager(void) + MY_ATTRIBUTE((__warn_unused_result__)); + +class Rdb_ddl_manager; +Rdb_ddl_manager *rdb_get_ddl_manager(void) + MY_ATTRIBUTE((__warn_unused_result__)); + +class Rdb_binlog_manager; +Rdb_binlog_manager *rdb_get_binlog_manager(void) + MY_ATTRIBUTE((__warn_unused_result__)); +} // namespace myrocks diff --git a/storage/rocksdb/logger.h b/storage/rocksdb/logger.h new file mode 100644 index 00000000..8902bc18 --- /dev/null +++ b/storage/rocksdb/logger.h @@ -0,0 +1,85 @@ +/* + Copyright (c) 2015, Facebook, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA */ +#pragma once + +#include +#include +#include + +namespace myrocks { + +class Rdb_logger : public rocksdb::Logger { + public: + explicit Rdb_logger(const rocksdb::InfoLogLevel log_level = + rocksdb::InfoLogLevel::ERROR_LEVEL) + : m_mysql_log_level(log_level) {} + + void Logv(const rocksdb::InfoLogLevel log_level, const char *format, + va_list ap) override { + DBUG_ASSERT(format != nullptr); + + enum loglevel mysql_log_level; + + if (m_logger) { + m_logger->Logv(log_level, format, ap); + } + + if (log_level < m_mysql_log_level) { + return; + } + + if (log_level >= rocksdb::InfoLogLevel::ERROR_LEVEL) { + mysql_log_level = ERROR_LEVEL; + } else if (log_level >= rocksdb::InfoLogLevel::WARN_LEVEL) { + mysql_log_level = WARNING_LEVEL; + } else { + mysql_log_level = INFORMATION_LEVEL; + } + + // log to MySQL + std::string f("LibRocksDB:"); + f.append(format); + error_log_print(mysql_log_level, f.c_str(), ap); + } + + void Logv(const char *format, va_list ap) override { + DBUG_ASSERT(format != nullptr); + // If no level is specified, it is by default at information level + Logv(rocksdb::InfoLogLevel::INFO_LEVEL, format, ap); + } + + void SetRocksDBLogger(const std::shared_ptr logger) { + m_logger = logger; + } + + void SetInfoLogLevel(const rocksdb::InfoLogLevel log_level) override { + // The InfoLogLevel for the logger is used by rocksdb to filter + // messages, so it needs to be the lower of the two loggers + rocksdb::InfoLogLevel base_level = log_level; + + if (m_logger && m_logger->GetInfoLogLevel() < base_level) { + base_level = m_logger->GetInfoLogLevel(); + } + rocksdb::Logger::SetInfoLogLevel(base_level); + m_mysql_log_level = log_level; + } + + private: + std::shared_ptr m_logger; + rocksdb::InfoLogLevel m_mysql_log_level; +}; + +} // namespace myrocks diff --git a/storage/rocksdb/myrocks_hotbackup.py b/storage/rocksdb/myrocks_hotbackup.py new file mode 100755 index 00000000..42c25c95 --- /dev/null +++ b/storage/rocksdb/myrocks_hotbackup.py @@ -0,0 +1,697 @@ +#!@PYTHON_SHEBANG@ + +from __future__ import division +from optparse import OptionParser +import collections +import signal +import os +import stat +import sys +import re +import subprocess +import logging +import logging.handlers +import time +import datetime +import shutil +import traceback +import tempfile + +import MySQLdb +import MySQLdb.connections +from MySQLdb import OperationalError, ProgrammingError + +logger = None +opts = None +rocksdb_files = ['MANIFEST', 'CURRENT', 'OPTIONS'] +rocksdb_data_suffix = '.sst' +rocksdb_wal_suffix = '.log' +exclude_files = ['master.info', 'relay-log.info', 'worker-relay-log.info', + 'auto.cnf', 'gaplock.log', 'ibdata', 'ib_logfile', '.trash'] +wdt_bin = 'wdt' + +def is_manifest(fname): + for m in rocksdb_files: + if fname.startswith(m): + return True + return False + +class Writer(object): + a = None + def __init__(self): + a = None + +class StreamWriter(Writer): + stream_cmd= '' + + def __init__(self, stream_option, direct = 0): + super(StreamWriter, self).__init__() + if stream_option == 'tar': + self.stream_cmd= 'tar chf -' + elif stream_option == 'xbstream': + self.stream_cmd= 'xbstream -c' + if direct: + self.stream_cmd = self.stream_cmd + ' -d' + else: + raise Exception("Only tar or xbstream is supported as streaming option.") + + def write(self, file_name): + rc= os.system(self.stream_cmd + " " + file_name) + if (rc != 0): + raise Exception("Got error on stream write: " + str(rc) + " " + file_name) + + +class MiscFilesProcessor(): + datadir = None + wildcard = r'.*\.[frm|MYD|MYI|MAD|MAI|MRG|TRG|TRN|ARM|ARZ|CSM|CSV|opt|par]' + regex = None + start_backup_time = None + skip_check_frm_timestamp = None + + def __init__(self, datadir, skip_check_frm_timestamp, start_backup_time): + self.datadir = datadir + self.regex = re.compile(self.wildcard) + self.skip_check_frm_timestamp = skip_check_frm_timestamp + self.start_backup_time = start_backup_time + + def process_db(self, db): + # do nothing + pass + + def process_file(self, path): + # do nothing + pass + + def check_frm_timestamp(self, fname, path): + if not self.skip_check_frm_timestamp and fname.endswith('.frm'): + if os.path.getmtime(path) > self.start_backup_time: + logger.error('FRM file %s was updated after starting backups. ' + 'Schema could have changed and the resulting copy may ' + 'not be valid. Aborting. ' + '(backup time: %s, file modifled time: %s)', + path, datetime.datetime.fromtimestamp(self.start_backup_time).strftime('%Y-%m-%d %H:%M:%S'), + datetime.datetime.fromtimestamp(os.path.getmtime(path)).strftime('%Y-%m-%d %H:%M:%S')) + raise Exception("Inconsistent frm file timestamp"); + + def process(self): + os.chdir(self.datadir) + for db in self.get_databases(): + logger.info("Starting MySQL misc file traversal from database %s..", db) + self.process_db(db) + for f in self.get_files(db): + if self.match(f): + rel_path = os.path.join(db, f) + self.check_frm_timestamp(f, rel_path) + self.process_file(rel_path) + logger.info("Traversing misc files from data directory..") + for f in self.get_files(""): + should_skip = False + for e in exclude_files: + if f.startswith(e) or f.endswith(e): + logger.info("Skipping %s", f) + should_skip = True + break + if not should_skip: + self.process_file(f) + + def match(self, filename): + if self.regex.match(filename): + return True + else: + return False + + def get_databases(self): + dbs = [] + dirs = [ d for d in os.listdir(self.datadir) \ + if not os.path.isfile(os.path.join(self.datadir,d))] + for db in dirs: + if not db.startswith('.') and not self._is_socket(db) and not db == "#rocksdb": + dbs.append(db) + return dbs + + def get_files(self, db): + dbdir = self.datadir + "/" + db + return [ f for f in os.listdir(dbdir) \ + if os.path.isfile(os.path.join(dbdir,f))] + + def _is_socket(self, item): + mode = os.stat(os.path.join(self.datadir, item)).st_mode + if stat.S_ISSOCK(mode): + return True + return False + + +class MySQLBackup(MiscFilesProcessor): + writer = None + + def __init__(self, datadir, writer, skip_check_frm_timestamp, start_backup_time): + MiscFilesProcessor.__init__(self, datadir, skip_check_frm_timestamp, start_backup_time) + self.writer = writer + + def process_file(self, fname): # overriding base class + self.writer.write(fname) + + +class MiscFilesLinkCreator(MiscFilesProcessor): + snapshot_dir = None + + def __init__(self, datadir, snapshot_dir, skip_check_frm_timestamp, start_backup_time): + MiscFilesProcessor.__init__(self, datadir, skip_check_frm_timestamp, start_backup_time) + self.snapshot_dir = snapshot_dir + + def process_db(self, db): + snapshot_sub_dir = os.path.join(self.snapshot_dir, db) + os.makedirs(snapshot_sub_dir) + + def process_file(self, path): + dst_path = os.path.join(self.snapshot_dir, path) + os.link(path, dst_path) + + +# RocksDB backup +class RocksDBBackup(): + source_dir = None + writer = None + # sst files sent in this backup round + sent_sst = {} + # target sst files in this backup round + target_sst = {} + # sst files sent in all backup rounds + total_sent_sst= {} + # sum of sst file size sent in this backup round + sent_sst_size = 0 + # sum of target sst file size in this backup round + # if sent_sst_size becomes equal to target_sst_size, + # it means the backup round finished backing up all sst files + target_sst_size = 0 + # sum of all sst file size sent all backup rounds + total_sent_sst_size= 0 + # sum of all target sst file size from all backup rounds + total_target_sst_size = 0 + show_progress_size_interval= 1073741824 # 1GB + wal_files= [] + manifest_files= [] + finished= False + + def __init__(self, source_dir, writer, prev): + self.source_dir = source_dir + self.writer = writer + os.chdir(self.source_dir) + self.init_target_files(prev) + + def init_target_files(self, prev): + sst = {} + self.sent_sst = {} + self.target_sst= {} + self.total_sent_sst = {} + self.sent_sst_size = 0 + self.target_sst_size = 0 + self.total_sent_sst_size= 0 + self.total_target_sst_size= 0 + self.wal_files= [] + self.manifest_files= [] + + for f in os.listdir(self.source_dir): + if f.endswith(rocksdb_data_suffix): + # exactly the same file (same size) was sent in previous backup rounds + if prev is not None and f in prev.total_sent_sst and int(os.stat(f).st_size) == prev.total_sent_sst[f]: + continue + sst[f]= int(os.stat(f).st_size) + self.target_sst_size = self.target_sst_size + os.stat(f).st_size + elif is_manifest(f): + self.manifest_files.append(f) + elif f.endswith(rocksdb_wal_suffix): + self.wal_files.append(f) + self.target_sst= collections.OrderedDict(sorted(sst.items())) + + if prev is not None: + self.total_sent_sst = prev.total_sent_sst + self.total_sent_sst_size = prev.total_sent_sst_size + self.total_target_sst_size = self.target_sst_size + prev.total_sent_sst_size + else: + self.total_target_sst_size = self.target_sst_size + + def do_backup_single(self, fname): + self.writer.write(fname) + os.remove(fname) + + def do_backup_sst(self, fname, size): + self.do_backup_single(fname) + self.sent_sst[fname]= size + self.total_sent_sst[fname]= size + self.sent_sst_size = self.sent_sst_size + size + self.total_sent_sst_size = self.total_sent_sst_size + size + + def do_backup_manifest(self): + for f in self.manifest_files: + self.do_backup_single(f) + + def do_backup_wal(self): + for f in self.wal_files: + self.do_backup_single(f) + + # this is the last snapshot round. backing up all the rest files + def do_backup_final(self): + logger.info("Backup WAL..") + self.do_backup_wal() + logger.info("Backup Manifest..") + self.do_backup_manifest() + self.do_cleanup() + self.finished= True + + def do_cleanup(self): + shutil.rmtree(self.source_dir) + logger.info("Cleaned up checkpoint from %s", self.source_dir) + + def do_backup_until(self, time_limit): + logger.info("Starting backup from snapshot: target files %d", len(self.target_sst)) + start_time= time.time() + last_progress_time= start_time + progress_size= 0 + for fname, size in self.target_sst.iteritems(): + self.do_backup_sst(fname, size) + progress_size= progress_size + size + elapsed_seconds = time.time() - start_time + progress_seconds = time.time() - last_progress_time + + if self.should_show_progress(size): + self.show_progress(progress_size, progress_seconds) + progress_size=0 + last_progress_time= time.time() + + if elapsed_seconds > time_limit and self.has_sent_all_sst() is False: + logger.info("Snapshot round finished. Elapsed Time: %5.2f. Remaining sst files: %d", + elapsed_seconds, len(self.target_sst) - len(self.sent_sst)) + self.do_cleanup() + break; + if self.has_sent_all_sst(): + self.do_backup_final() + + return self + + def should_show_progress(self, size): + if int(self.total_sent_sst_size/self.show_progress_size_interval) > int((self.total_sent_sst_size-size)/self.show_progress_size_interval): + return True + else: + return False + + def show_progress(self, size, seconds): + logger.info("Backup Progress: %5.2f%% Sent %6.2f GB of %6.2f GB data, Transfer Speed: %6.2f MB/s", + self.total_sent_sst_size*100/self.total_target_sst_size, + self.total_sent_sst_size/1024/1024/1024, + self.total_target_sst_size/1024/1024/1024, + size/seconds/1024/1024) + + def print_backup_report(self): + logger.info("Sent %6.2f GB of sst files, %d files in total.", + self.total_sent_sst_size/1024/1024/1024, + len(self.total_sent_sst)) + + def has_sent_all_sst(self): + if self.sent_sst_size == self.target_sst_size: + return True + return False + + +class MySQLUtil: + @staticmethod + def connect(user, password, port, socket=None): + if socket: + dbh = MySQLdb.Connect(user=user, + passwd=password, + unix_socket=socket) + else: + dbh = MySQLdb.Connect(user=user, + passwd=password, + port=port, + host="127.0.0.1") + return dbh + + @staticmethod + def create_checkpoint(dbh, checkpoint_dir): + sql = ("SET GLOBAL rocksdb_create_checkpoint='{0}'" + .format(checkpoint_dir)) + cur= dbh.cursor() + cur.execute(sql) + cur.close() + + @staticmethod + def get_datadir(dbh): + sql = "SELECT @@datadir" + cur = dbh.cursor() + cur.execute(sql) + row = cur.fetchone() + return row[0] + + @staticmethod + def is_directio_enabled(dbh): + sql = "SELECT @@global.rocksdb_use_direct_reads" + cur = dbh.cursor() + cur.execute(sql) + row = cur.fetchone() + return row[0] + +class BackupRunner: + datadir = None + start_backup_time = None + + def __init__(self, datadir): + self.datadir = datadir + self.start_backup_time = time.time() + + def start_backup_round(self, backup_round, prev_backup): + def signal_handler(*args): + logger.info("Got signal. Exit") + if b is not None: + logger.info("Cleaning up snapshot directory..") + b.do_cleanup() + sys.exit(1) + + b = None + try: + signal.signal(signal.SIGINT, signal_handler) + w = None + if not opts.output_stream: + raise Exception("Currently only streaming backup is supported.") + + snapshot_dir = opts.checkpoint_directory + '/' + str(backup_round) + dbh = MySQLUtil.connect(opts.mysql_user, + opts.mysql_password, + opts.mysql_port, + opts.mysql_socket) + direct = MySQLUtil.is_directio_enabled(dbh) + logger.info("Direct I/O: %d", direct) + + w = StreamWriter(opts.output_stream, direct) + + if not self.datadir: + self.datadir = MySQLUtil.get_datadir(dbh) + logger.info("Set datadir: %s", self.datadir) + logger.info("Creating checkpoint at %s", snapshot_dir) + MySQLUtil.create_checkpoint(dbh, snapshot_dir) + logger.info("Created checkpoint at %s", snapshot_dir) + b = RocksDBBackup(snapshot_dir, w, prev_backup) + return b.do_backup_until(opts.checkpoint_interval) + except Exception as e: + logger.error(e) + logger.error(traceback.format_exc()) + if b is not None: + logger.info("Cleaning up snapshot directory.") + b.do_cleanup() + sys.exit(1) + + def backup_mysql(self): + try: + w = None + if opts.output_stream: + w = StreamWriter(opts.output_stream) + else: + raise Exception("Currently only streaming backup is supported.") + b = MySQLBackup(self.datadir, w, opts.skip_check_frm_timestamp, + self.start_backup_time) + logger.info("Taking MySQL misc backups..") + b.process() + logger.info("MySQL misc backups done.") + except Exception as e: + logger.error(e) + logger.error(traceback.format_exc()) + sys.exit(1) + + +class WDTBackup: + datadir = None + start_backup_time = None + + def __init__(self, datadir): + self.datadir = datadir + self.start_backup_time = time.time() + + def cleanup(self, snapshot_dir, server_log): + if server_log: + server_log.seek(0) + logger.info("WDT server log:") + logger.info(server_log.read()) + server_log.close() + if snapshot_dir: + logger.info("Cleaning up snapshot dir %s", snapshot_dir) + shutil.rmtree(snapshot_dir) + + def backup_with_timeout(self, backup_round): + def signal_handler(*args): + logger.info("Got signal. Exit") + self.cleanup(snapshot_dir, server_log) + sys.exit(1) + + logger.info("Starting backup round %d", backup_round) + snapshot_dir = None + server_log = None + try: + signal.signal(signal.SIGINT, signal_handler) + # create rocksdb snapshot + snapshot_dir = os.path.join(opts.checkpoint_directory, str(backup_round)) + dbh = MySQLUtil.connect(opts.mysql_user, + opts.mysql_password, + opts.mysql_port, + opts.mysql_socket) + logger.info("Creating checkpoint at %s", snapshot_dir) + MySQLUtil.create_checkpoint(dbh, snapshot_dir) + logger.info("Created checkpoint at %s", snapshot_dir) + + # get datadir if not provided + if not self.datadir: + self.datadir = MySQLUtil.get_datadir(dbh) + logger.info("Set datadir: %s", self.datadir) + + # create links for misc files + link_creator = MiscFilesLinkCreator(self.datadir, snapshot_dir, + opts.skip_check_frm_timestamp, + self.start_backup_time) + link_creator.process() + + current_path = os.path.join(opts.backupdir, "CURRENT") + + # construct receiver cmd, using the data directory as recovery-id. + # we delete the current file because it is not append-only, therefore not + # resumable. + remote_cmd = ( + "ssh {0} rm -f {1}; " + "{2} -directory {3} -enable_download_resumption " + "-recovery_id {4} -start_port 0 -abort_after_seconds {5} {6}" + ).format(opts.destination, + current_path, + wdt_bin, + opts.backupdir, + self.datadir, + opts.checkpoint_interval, + opts.extra_wdt_receiver_options) + logger.info("WDT remote cmd %s", remote_cmd) + server_log = tempfile.TemporaryFile() + remote_process = subprocess.Popen(remote_cmd.split(), + stdout=subprocess.PIPE, + stderr=server_log) + wdt_url = remote_process.stdout.readline().strip() + if not wdt_url: + raise Exception("Unable to get connection url from wdt receiver") + sender_cmd = ( + "{0} -connection_url \'{1}\' -directory {2} -app_name=myrocks " + "-avg_mbytes_per_sec {3} " + "-enable_download_resumption -abort_after_seconds {4} {5}" + ).format(wdt_bin, + wdt_url, + snapshot_dir, + opts.avg_mbytes_per_sec, + opts.checkpoint_interval, + opts.extra_wdt_sender_options) + sender_status = os.system(sender_cmd) >> 8 + remote_status = remote_process.wait() + self.cleanup(snapshot_dir, server_log) + # TODO: handle retryable and non-retyable errors differently + return (sender_status == 0 and remote_status == 0) + + except Exception as e: + logger.error(e) + logger.error(traceback.format_exc()) + self.cleanup(snapshot_dir, server_log) + sys.exit(1) + + +def backup_using_wdt(): + if not opts.destination: + logger.error("Must provide remote destination when using WDT") + sys.exit(1) + + # TODO: detect whether WDT is installed + logger.info("Backing up myrocks to %s using WDT", opts.destination) + wdt_backup = WDTBackup(opts.datadir) + finished = False + backup_round = 1 + while not finished: + start_time = time.time() + finished = wdt_backup.backup_with_timeout(backup_round) + end_time = time.time() + duration_seconds = end_time - start_time + if (not finished) and (duration_seconds < opts.checkpoint_interval): + # round finished before timeout + sleep_duration = (opts.checkpoint_interval - duration_seconds) + logger.info("Sleeping for %f seconds", sleep_duration) + time.sleep(sleep_duration) + + backup_round = backup_round + 1 + logger.info("Finished myrocks backup using WDT") + + +def init_logger(): + global logger + logger = logging.getLogger('myrocks_hotbackup') + logger.setLevel(logging.INFO) + h1= logging.StreamHandler(sys.stderr) + f = logging.Formatter("%(asctime)s.%(msecs)03d %(levelname)s %(message)s", + "%Y-%m-%d %H:%M:%S") + h1.setFormatter(f) + logger.addHandler(h1) + +backup_wdt_usage = ("Backup using WDT: myrocks_hotbackup " + "--user=root --password=pw --stream=wdt " + "--checkpoint_dir= --destination= --backup_dir=" + ". This has to be executed at the src " + "host.") +backup_usage= "Backup: set -o pipefail; myrocks_hotbackup --user=root --password=pw --port=3306 --checkpoint_dir= | ssh -o NoneEnabled=yes remote_server 'tar -xi -C ' . You need to execute backup command on a server where you take backups." +move_back_usage= "Move-Back: myrocks_hotbackup --move_back --datadir= --rocksdb_datadir= --rocksdb_waldir= --backup_dir= . You need to execute move-back command on a server where backup files are sent." + + +def parse_options(): + global opts + parser = OptionParser(usage = "\n\n" + backup_usage + "\n\n" + \ + backup_wdt_usage + "\n\n" + move_back_usage) + parser.add_option('-i', '--interval', type='int', dest='checkpoint_interval', + default=300, + help='Number of seconds to renew checkpoint') + parser.add_option('-c', '--checkpoint_dir', type='string', dest='checkpoint_directory', + default='/data/mysql/backup/snapshot', + help='Local directory name where checkpoints will be created.') + parser.add_option('-d', '--datadir', type='string', dest='datadir', + default=None, + help='backup mode: src MySQL datadir. move_back mode: dest MySQL datadir') + parser.add_option('-s', '--stream', type='string', dest='output_stream', + default='tar', + help='Setting streaming backup options. Currently tar, WDT ' + 'and xbstream are supported. Default is tar') + parser.add_option('--destination', type='string', dest='destination', + default='', + help='Remote server name. Only used for WDT mode so far.') + parser.add_option('--avg_mbytes_per_sec', type='int', + dest='avg_mbytes_per_sec', + default=500, + help='Average backup rate in MBytes/sec. WDT only.') + parser.add_option('--extra_wdt_sender_options', type='string', + dest='extra_wdt_sender_options', + default='', + help='Extra options for WDT sender') + parser.add_option('--extra_wdt_receiver_options', type='string', + dest='extra_wdt_receiver_options', + default='', + help='Extra options for WDT receiver') + parser.add_option('-u', '--user', type='string', dest='mysql_user', + default='root', + help='MySQL user name') + parser.add_option('-p', '--password', type='string', dest='mysql_password', + default='', + help='MySQL password name') + parser.add_option('-P', '--port', type='int', dest='mysql_port', + default=3306, + help='MySQL port number') + parser.add_option('-S', '--socket', type='string', dest='mysql_socket', + default=None, + help='MySQL socket path. Takes precedence over --port.') + parser.add_option('-m', '--move_back', action='store_true', dest='move_back', + default=False, + help='Moving MyRocks backup files to proper locations.') + parser.add_option('-r', '--rocksdb_datadir', type='string', dest='rocksdb_datadir', + default=None, + help='RocksDB target data directory where backup data files will be moved. Must be empty.') + parser.add_option('-w', '--rocksdb_waldir', type='string', dest='rocksdb_waldir', + default=None, + help='RocksDB target data directory where backup wal files will be moved. Must be empty.') + parser.add_option('-b', '--backup_dir', type='string', dest='backupdir', + default=None, + help='backup mode for WDT: Remote directory to store ' + 'backup. move_back mode: Locations where backup ' + 'files are stored.') + parser.add_option('-f', '--skip_check_frm_timestamp', + dest='skip_check_frm_timestamp', + action='store_true', default=False, + help='skipping to check if frm files are updated after starting backup.') + parser.add_option('-D', '--debug_signal_file', type='string', dest='debug_signal_file', + default=None, + help='debugging purpose: waiting until the specified file is created') + + opts, args = parser.parse_args() + + +def create_moveback_dir(directory): + if not os.path.exists(directory): + os.makedirs(directory) + else: + for f in os.listdir(directory): + logger.error("Directory %s has file or directory %s!", directory, f) + raise + +def print_move_back_usage(): + logger.warning(move_back_usage) + +def move_back(): + if opts.rocksdb_datadir is None or opts.rocksdb_waldir is None or opts.backupdir is None or opts.datadir is None: + print_move_back_usage() + sys.exit() + create_moveback_dir(opts.datadir) + create_moveback_dir(opts.rocksdb_datadir) + create_moveback_dir(opts.rocksdb_waldir) + + os.chdir(opts.backupdir) + for f in os.listdir(opts.backupdir): + if os.path.isfile(os.path.join(opts.backupdir,f)): + if f.endswith(rocksdb_wal_suffix): + shutil.move(f, opts.rocksdb_waldir) + elif f.endswith(rocksdb_data_suffix) or is_manifest(f): + shutil.move(f, opts.rocksdb_datadir) + else: + shutil.move(f, opts.datadir) + else: #directory + if f.endswith('.rocksdb'): + continue + shutil.move(f, opts.datadir) + +def start_backup(): + logger.info("Starting backup.") + runner = BackupRunner(opts.datadir) + b = None + backup_round= 1 + while True: + b = runner.start_backup_round(backup_round, b) + backup_round = backup_round + 1 + if b.finished is True: + b.print_backup_report() + logger.info("RocksDB Backup Done.") + break + if opts.debug_signal_file: + while not os.path.exists(opts.debug_signal_file): + logger.info("Waiting until %s is created..", opts.debug_signal_file) + time.sleep(1) + runner.backup_mysql() + logger.info("All Backups Done.") + + +def main(): + parse_options() + init_logger() + + if opts.move_back is True: + move_back() + elif opts.output_stream == 'wdt': + backup_using_wdt() + else: + start_backup() + +if __name__ == "__main__": + main() diff --git a/storage/rocksdb/mysql-test/rocksdb/combinations b/storage/rocksdb/mysql-test/rocksdb/combinations new file mode 100644 index 00000000..be8080d4 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/combinations @@ -0,0 +1,5 @@ +[write_committed] +loose-rocksdb_write_policy=write_committed + +[write_prepared] +loose-rocksdb_write_policy=write_prepared diff --git a/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc b/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc new file mode 100644 index 00000000..ba2e7ace --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/autoinc_crash_safe.inc @@ -0,0 +1,150 @@ +--echo # +--echo # Testing concurrent transactions. +--echo # + +--source include/count_sessions.inc +connect (con1,localhost,root,,); +connect (con2,localhost,root,,); +connect (con3,localhost,root,,); + +connection con1; +begin; +insert into t values (); # 1 + +connection con2; +begin; +insert into t values (); # 2 + +connection con3; +begin; +insert into t values (); # 3 + +connection con1; +insert into t values (); # 4 + +connection con2; +insert into t values (); # 5 + +connection con3; +insert into t values (); # 6 + +connection con2; +commit; + +connection con3; +rollback; + +connection con1; +commit; + +delete from t; + +--echo # Master value before restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Slave value before restart +sync_slave_with_master; +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +connection slave; +--source include/stop_slave.inc +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc + +connection default; +--echo # Master value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--let $rpl_server_number = 2 +--source include/rpl_restart_server.inc + +connection slave; +--source include/start_slave.inc +--echo # Slave value after restart +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +disconnect con1; +disconnect con2; +disconnect con3; +--source include/wait_until_count_sessions.inc + +--echo # +--echo # Testing interaction of merge markers with various DDL statements. +--echo # +connection slave; +--source include/stop_slave.inc + +connection default; + +--echo # Drop and add primary key. +alter table t modify i int; +alter table t drop primary key; +alter table t add primary key (i); +alter table t modify i int auto_increment; + +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Remove auto_increment property. +alter table t modify i int; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Add auto_increment property. +insert into t values (123); +alter table t modify i int auto_increment; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Add column j. +alter table t add column j int; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Rename tables. +rename table t to t2; +rename table t2 to t; + +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Change auto_increment property +alter table t auto_increment = 1000; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +alter table t auto_increment = 1; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +alter table t drop primary key, add key (i), auto_increment = 1; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +alter table t add key (j), auto_increment = 1; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +alter table t modify i int; +alter table t add column (k int auto_increment), add key(k), auto_increment=15; +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc +select table_schema, table_name, auto_increment from information_schema.tables where table_name = 't'; + +--echo # Drop table. +drop table t; + +--let $rpl_server_number = 1 +--source include/rpl_restart_server.inc + +connection slave; +--source include/start_slave.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc new file mode 100644 index 00000000..3bb766d5 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load.inc @@ -0,0 +1,165 @@ +--source include/count_sessions.inc + +if ($data_order_desc) +{ + --echo Data will be ordered in descending order +} + +if (!$data_order_desc) +{ + --echo Data will be ordered in ascending order +} + +# Create a table with a primary key and one secondary key as well as one +# more column +eval CREATE TABLE t1( + pk CHAR(5), + a CHAR(30), + b CHAR(30), + PRIMARY KEY(pk) COMMENT "$pk_cf", + KEY(a) +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; + +# Create a second identical table to validate that bulk loading different +# tables in the same session works +eval CREATE TABLE t2( + pk CHAR(5), + a CHAR(30), + b CHAR(30), + PRIMARY KEY(pk) COMMENT "$pk_cf", + KEY(a) +) ENGINE=ROCKSDB COLLATE 'latin1_bin'; + +# Create a third table using partitions to validate that bulk loading works +# across a partitioned table +eval CREATE TABLE t3( + pk CHAR(5), + a CHAR(30), + b CHAR(30), + PRIMARY KEY(pk) COMMENT "$pk_cf", + KEY(a) +) ENGINE=ROCKSDB COLLATE 'latin1_bin' PARTITION BY KEY() PARTITIONS 4; + +--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")` + +--let MTR_DATA_ORDER_DESC = $data_order_desc; + +# Create a text file with data to import into the table. +# The primary key is in sorted order and the secondary keys are randomly generated +--let ROCKSDB_INFILE = $file +perl; +my $fn = $ENV{'ROCKSDB_INFILE'}; +open(my $fh, '>', $fn) || die "perl open($fn): $!"; +my $max = 2500000; +my $desc = $ENV{'MTR_DATA_ORDER_DESC'}; +my @chars = ("A".."Z", "a".."z", "0".."9"); +my @powers_of_26 = (26 * 26 * 26 * 26, 26 * 26 * 26, 26 * 26, 26, 1); +for (my $ii = 0; $ii < $max; $ii++) +{ + my $pk; + my $tmp = $ii; + foreach (@powers_of_26) + { + if ($desc == 1) + { + $pk .= chr(ord('z') - int($tmp / $_)); + } + else + { + $pk .= chr(ord('a') + int($tmp / $_)); + } + + $tmp = $tmp % $_; + } + + my $num = int(rand(25)) + 6; + my $a; + $a .= $chars[rand(@chars)] for 1..$num; + + $num = int(rand(25)) + 6; + my $b; + $b .= $chars[rand(@chars)] for 1..$num; + print $fh "$pk\t$a\t$b\n"; +} +close($fh); +EOF + +--file_exists $file + +# Make sure a snapshot held by another user doesn't block the bulk load +connect (other,localhost,root,,); +set session transaction isolation level repeatable read; +start transaction with consistent snapshot; + +# Assert that there is a pending snapshot +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; + +connection default; + +# Update CF to smaller value to create multiple SST in ingestion +eval SET @@GLOBAL.ROCKSDB_UPDATE_CF_OPTIONS= + '$pk_cf_name={write_buffer_size=8m;target_file_size_base=1m};'; + +set rocksdb_bulk_load=1; +set rocksdb_bulk_load_size=100000; +--disable_query_log +--echo LOAD DATA INFILE INTO TABLE t1; +eval LOAD DATA INFILE '$file' INTO TABLE t1; +# There should be no SST being ingested +select * from t1; +--echo LOAD DATA INFILE INTO TABLE t2; +eval LOAD DATA INFILE '$file' INTO TABLE t2; +# There should be no SST being ingested +select * from t2; +--echo LOAD DATA INFILE INTO TABLE t3; +eval LOAD DATA INFILE '$file' INTO TABLE t3; +# There should be no SST being ingested +select * from t3; +--enable_query_log +set rocksdb_bulk_load=0; + +--remove_file $file + +# Make sure row count index stats are correct +--replace_column 6 # 7 # 8 # 9 # 12 # 13 # +SHOW TABLE STATUS WHERE name LIKE 't%'; + +ANALYZE TABLE t1, t2, t3; + +--replace_column 6 # 7 # 8 # 9 # 12 # 13 # +SHOW TABLE STATUS WHERE name LIKE 't%'; + +# Make sure all the data is there. +select count(pk),count(a) from t1; +select count(b) from t1; +select count(pk),count(a) from t2; +select count(b) from t2; +select count(pk),count(a) from t3; +select count(b) from t3; + +# Create a dummy file with a bulk load extesion. It should be removed when +# the server starts +--let $tmpext = .bulk_load.tmp +--let $MYSQLD_DATADIR= `SELECT @@datadir` +--let $datadir = $MYSQLD_DATADIR/#rocksdb +--write_file $datadir/test$tmpext +dummy data +EOF +--write_file $datadir/longfilenamethatvalidatesthatthiswillgetdeleted$tmpext +dummy data +EOF + +# Show the files exists +--list_files $datadir *$tmpext + +# Now restart the server and make sure it automatically removes this test file +--source include/restart_mysqld.inc + +# Show the files do not exist +--list_files $datadir *$tmpext + +# Cleanup +disconnect other; +DROP TABLE t1, t2, t3; + +--source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc new file mode 100644 index 00000000..d6bee980 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/bulk_load_unsorted.inc @@ -0,0 +1,143 @@ +--source include/have_partition.inc +--source include/count_sessions.inc + +SET rocksdb_bulk_load_size=3; +SET rocksdb_bulk_load_allow_unsorted=1; + +### Test individual INSERTs ### + +# A table with only a PK won't have rows until the bulk load is finished +eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +--disable_query_log +let $sign = 1; +let $max = 5; +let $i = 1; +while ($i <= $max) { + let $a = 1 + $sign * $i; + let $b = 1 - $sign * $i; + let $sign = -$sign; + let $insert = INSERT INTO t1 VALUES ($a, $b); + eval $insert; + inc $i; +} +--enable_query_log +SELECT * FROM t1 FORCE INDEX (PRIMARY); +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +DROP TABLE t1; + +# A table with a PK and a SK shows rows immediately +eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf", KEY(b)) + ENGINE=ROCKSDB; +SET rocksdb_bulk_load=1; +--disable_query_log +let $sign = 1; +let $max = 5; +let $i = 1; +while ($i <= $max) { + let $a = 1 + $sign * $i; + let $b = 1 - $sign * $i; + let $sign = -$sign; + let $insert = INSERT INTO t1 VALUES ($a, $b); + eval $insert; + inc $i; +} +--enable_query_log + +SELECT * FROM t1 FORCE INDEX (PRIMARY); +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +DROP TABLE t1; + +# Inserting into another table finishes bulk load to the previous table +eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB; +eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB; + +SET rocksdb_bulk_load=1; +INSERT INTO t1 VALUES (1,1); +INSERT INTO t2 VALUES (1,1); +SELECT * FROM t1 FORCE INDEX (PRIMARY); +INSERT INTO t1 VALUES (2,2); +SELECT * FROM t2 FORCE INDEX (PRIMARY); +SELECT * FROM t1 FORCE INDEX (PRIMARY); +SET rocksdb_bulk_load=0; +SELECT * FROM t1 FORCE INDEX (PRIMARY); +DROP TABLE t1, t2; + +### Test bulk load from a file ### +eval CREATE TABLE t1(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB; +eval CREATE TABLE t2(a INT, b INT, PRIMARY KEY(b) COMMENT "$pk_cf") + ENGINE=ROCKSDB; +eval CREATE TABLE t3(a INT, b INT, PRIMARY KEY(a) COMMENT "$pk_cf") + ENGINE=ROCKSDB PARTITION BY KEY() PARTITIONS 4; + +--let $file = `SELECT CONCAT(@@datadir, "test_loadfile.txt")` +# Create a text file with data to import into the table. +# PK and SK are not in any order +--let ROCKSDB_INFILE = $file +perl; +my $fn = $ENV{'ROCKSDB_INFILE'}; +open(my $fh, '>', $fn) || die "perl open($fn): $!"; +binmode $fh; +my $max = 2500000; +my $sign = 1; +for (my $ii = 0; $ii < $max; $ii++) +{ + my $a = 1 + $sign * $ii; + my $b = 1 - $sign * $ii; + $sign = -$sign; + print $fh "$a\t$b\n"; +} +close($fh); +EOF +--file_exists $file + +# Make sure a snapshot held by another user doesn't block the bulk load +connect (other,localhost,root,,); +set session transaction isolation level repeatable read; +start transaction with consistent snapshot; + +# Assert that there is a pending snapshot +select VALUE > 0 as 'Has opened snapshots' from information_schema.rocksdb_dbstats where stat_type='DB_NUM_SNAPSHOTS'; + +connection default; +set rocksdb_bulk_load=1; +set rocksdb_bulk_load_size=100000; +--disable_query_log +--echo LOAD DATA INFILE INTO TABLE t1; +eval LOAD DATA INFILE '$file' INTO TABLE t1; +--echo LOAD DATA INFILE INTO TABLE t2; +eval LOAD DATA INFILE '$file' INTO TABLE t2; +--echo LOAD DATA INFILE INTO TABLE t3; +eval LOAD DATA INFILE '$file' INTO TABLE t3; +--enable_query_log +set rocksdb_bulk_load=0; + +--remove_file $file + +# Make sure row count index stats are correct +--replace_column 6 # 7 # 8 # 9 # 12 # 13 # +SHOW TABLE STATUS WHERE name LIKE 't%'; + +ANALYZE TABLE t1, t2, t3; + +--replace_column 6 # 7 # 8 # 9 # 12 # 13 # +SHOW TABLE STATUS WHERE name LIKE 't%'; + +# Make sure all the data is there. +select count(a),count(b) from t1; +select count(a),count(b) from t2; +select count(a),count(b) from t3; + +SELECT * FROM t1 FORCE INDEX (PRIMARY) LIMIT 3; +SELECT * FROM t2 FORCE INDEX (PRIMARY) LIMIT 3; + +disconnect other; +DROP TABLE t1, t2, t3; + +--source include/wait_until_count_sessions.inc diff --git a/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc b/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc new file mode 100644 index 00000000..233635b3 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/bypass_create_table.inc @@ -0,0 +1,298 @@ +CREATE TABLE `link_table` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0' , + `id1_type` int(10) unsigned NOT NULL DEFAULT '0' , + `id2` bigint(20) unsigned NOT NULL DEFAULT '0' , + `id2_type` int(10) unsigned NOT NULL DEFAULT '0' , + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , + `visibility` tinyint(3) NOT NULL DEFAULT '0' , + `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , + `time` int(10) unsigned NOT NULL DEFAULT '0' , + `version` bigint(20) unsigned NOT NULL DEFAULT '0' , + PRIMARY KEY (`link_type` , `id1` , `id2`) COMMENT 'cf_link' , + KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , + `version` , `data`) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + +CREATE TABLE `link_table2` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0' , + `id1_type` int(10) unsigned NOT NULL DEFAULT '0' , + `id2` bigint(20) unsigned NOT NULL DEFAULT '0' , + `id2_type` int(10) unsigned NOT NULL DEFAULT '0' , + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0' , + `visibility` tinyint(3) NOT NULL DEFAULT '0' , + `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '' , + `time` int(10) unsigned NOT NULL DEFAULT '0' , + `version` bigint(20) unsigned NOT NULL DEFAULT '0' , + PRIMARY KEY (`link_type` , `id1` , `id2`) + COMMENT 'cf_link' , + KEY `id1_type` (`id1` , `link_type` , `visibility` , `time` , `id2` , + `version` , `data`) COMMENT 'cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=9; + +insert into link_table values (1, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (1, 1, 2, 2, 3, 3, 'a10', 10, 125); +insert into link_table values (1, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (1, 1, 5, 2, 3, 3, 'a12', 12, 125); +insert into link_table values (1, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (1, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (1, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (1, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (2, 1, 1, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 2, 2, 4, 4, 'a10', 10, 125); +insert into link_table values (2, 1, 3, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 4, 2, 4, 4, 'a11', 11, 125); +insert into link_table values (2, 1, 5, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 6, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 7, 2, 4, 4, 'a12', 12, 125); +insert into link_table values (2, 1, 8, 2, 4, 4, 'a13', 13, 125); +insert into link_table values (2, 1, 9, 2, 4, 4, 'a14', 14, 125); +insert into link_table values (2, 1, 10, 2, 4, 4, 'a15', 15, 125); +insert into link_table values (3, 1, 10, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 9, 2, 3, 4, 'a10', 10, 125); +insert into link_table values (3, 1, 8, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 7, 2, 3, 4, 'a11', 11, 125); +insert into link_table values (3, 1, 6, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 5, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 4, 2, 3, 4, 'a12', 12, 125); +insert into link_table values (3, 1, 3, 2, 3, 4, 'a13', 13, 125); +insert into link_table values (3, 1, 2, 2, 3, 4, 'a14', 14, 125); +insert into link_table values (3, 1, 1, 2, 3, 4, 'a15', 15, 125); +insert into link_table values (9, 1, 9, 2, 5, 6, '0 ', 10, 125); +insert into link_table values (9, 1, 8, 2, 5, 6, '01 ', 11, 125); +insert into link_table values (9, 1, 7, 2, 5, 6, '012 ', 11, 125); +insert into link_table values (9, 1, 6, 2, 5, 6, '0123 ', 12, 125); +insert into link_table values (9, 1, 5, 2, 5, 6, '01234 ', 12, 125); +insert into link_table values (9, 1, 4, 2, 5, 6, '012345 ', 12, 125); +insert into link_table values (9, 1, 3, 2, 5, 6, '0123456 ', 13, 125); +insert into link_table values (9, 1, 2, 2, 5, 6, '01234567 ', 14, 125); +insert into link_table values (9, 1, 1, 2, 5, 6, '012345678 ', 15, 125); +insert into link_table values (9, 1, 0, 2, 5, 6, '0123456789 ', 15, 125); + +insert into link_table2 select * from link_table; + +CREATE TABLE `id_table` ( + `id` bigint(20) NOT NULL DEFAULT '0', + `type` int(11) NOT NULL DEFAULT '0', + `row_created_time` int(11) NOT NULL DEFAULT '0', + `hash_key` varchar(255) NOT NULL DEFAULT '', + `is_deleted` tinyint(4) DEFAULT '0', + PRIMARY KEY (`id`), + KEY `type_id` (`type`,`id`) +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 ROW_FORMAT=COMPRESSED +KEY_BLOCK_SIZE=8; + +insert into id_table values (1, 1, 10, '111', 0); +insert into id_table values (2, 1, 10, '111', 1); +insert into id_table values (3, 1, 10, '111', 0); +insert into id_table values (4, 1, 10, '111', 1); +insert into id_table values (5, 1, 10, '111', 0); +insert into id_table values (6, 1, 10, '111', 1); +insert into id_table values (7, 1, 10, '111', 0); +insert into id_table values (8, 1, 10, '111', 1); +insert into id_table values (9, 1, 10, '111', 0); +insert into id_table values (10, 1, 10, '111', 1); + +CREATE TABLE `node_table` ( + `id` bigint(20) unsigned NOT NULL DEFAULT '0', + `type` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + `update_time` int(10) unsigned NOT NULL DEFAULT '0', + `data` mediumtext COLLATE latin1_bin NOT NULL, + PRIMARY KEY (`type`,`id`) COMMENT 'cf_node_type_id', + KEY `id` (`id`) COMMENT 'cf_node' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + +insert into node_table values (1, 1, 1, 10, 'data'); + +insert into node_table values (2, 1, 1, 10, 'data'); + +insert into node_table values (3, 1, 1, 10, 'data'); + +insert into node_table values (4, 1, 1, 10, 'data'); + +insert into node_table values (5, 1, 1, 10, 'data'); + +insert into node_table values (6, 1, 1, 10, 'data'); + +insert into node_table values (7, 1, 1, 10, 'data'); + +insert into node_table values (8, 1, 1, 10, 'data'); + +insert into node_table values (9, 1, 1, 10, 'data'); + +insert into node_table values (10, 1, 1, 10, 'data'); + +CREATE TABLE `count_table` ( + `id` bigint(20) unsigned NOT NULL DEFAULT '0', + `type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `count` int(10) unsigned NOT NULL DEFAULT '0', + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`id`,`link_type`) COMMENT 'cf_count_table' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + + +insert into count_table values (2, 1, 1, 1, 10, 20); + +insert into count_table values (3, 1, 1, 1, 10, 20); + +insert into count_table values (4, 1, 1, 1, 10, 20); + +insert into count_table values (5, 1, 1, 1, 10, 20); + +insert into count_table values (6, 1, 1, 1, 10, 20); + +insert into count_table values (7, 1, 1, 1, 10, 20); + +insert into count_table values (8, 1, 1, 1, 10, 20); + +insert into count_table values (9, 1, 1, 1, 10, 20); + +insert into count_table values (10, 1, 1, 1, 10, 20); + +CREATE TABLE `link_table5` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0', + `id1_type` int(10) unsigned NOT NULL DEFAULT '0', + `id2` bigint(20) unsigned NOT NULL DEFAULT '0', + `id2_type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `visibility` tinyint(3) NOT NULL DEFAULT '0', + `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + +insert into link_table5 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table5 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table5 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table5 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table5 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table5 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table5 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); + + +CREATE TABLE `link_table3` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0', + `id1_type` int(10) unsigned NOT NULL DEFAULT '0', + `id2` bigint(20) unsigned NOT NULL DEFAULT '0', + `id2_type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `visibility` tinyint(4) NOT NULL DEFAULT '0', + `data` text COLLATE latin1_bin NOT NULL, + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', + KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`) + COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; + +insert into link_table3 values (1, 1, 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table3 values (1, 1, 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table3 values (1, 1, 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table3 values (1, 1, 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table3 values (2, 1, 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table3 values (2, 1, 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table3 values (2, 1, 3, 2, 1, 1, 'data32', 1, 1); + +CREATE TABLE `link_table6` ( + `id1` bigint(20) unsigned NOT NULL DEFAULT '0', + `id1_type` int(10) unsigned NOT NULL DEFAULT '0', + `id2` bigint(20) unsigned NOT NULL DEFAULT '0', + `id2_type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `visibility` tinyint(4) NOT NULL DEFAULT '0', + `data` text COLLATE latin1_bin NOT NULL, + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', + KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`, + `data`(255)) COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=4; + +insert into link_table6 values (1, 1, 2, 2, 1, 1, + 'data12_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 3, 2, 1, 2, + 'data13_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 4, 2, 1, 2, + 'data14_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (1, 1, 5, 2, 1, 1, + 'data15_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 1, 2, 1, 1, + 'data21_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 2, 2, 1, 1, + 'data22_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); +insert into link_table6 values (2, 1, 3, 2, 1, 1, + 'data32_12345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890123456789012345678901234567890123456789' + '0123456789012345678901234567890', 1, 1); + +CREATE TABLE `link_table4` ( + `id1` binary(16) NOT NULL DEFAULT '\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0', + `raw_key` text COLLATE latin1_bin, + `id2` bigint(20) unsigned NOT NULL DEFAULT '0', + `id2_type` int(10) unsigned NOT NULL DEFAULT '0', + `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', + `visibility` tinyint(3) NOT NULL DEFAULT '0', + `data` varchar(255) COLLATE latin1_bin NOT NULL DEFAULT '', + `time` int(10) unsigned NOT NULL DEFAULT '0', + `version` bigint(20) unsigned NOT NULL DEFAULT '0', + PRIMARY KEY (`link_type`,`id1`,`id2`) COMMENT 'cf_link', + KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`) + COMMENT 'rev:cf_link_id1_type' +) ENGINE=ROCKSDB DEFAULT CHARSET=latin1 COLLATE=latin1_bin +ROW_FORMAT=COMPRESSED KEY_BLOCK_SIZE=8; + +insert into link_table4 values ('a1', "rk1", 2, 2, 1, 1, 'data12', 1, 1); +insert into link_table4 values ('a1', "rk2", 3, 2, 1, 2, 'data13', 1, 1); +insert into link_table4 values ('a1', "rk3", 4, 2, 1, 2, 'data14', 1, 1); +insert into link_table4 values ('a1', "rk4", 5, 2, 1, 1, 'data15', 1, 1); +insert into link_table4 values ('b1', "rk5", 1, 2, 1, 1, 'data21', 1, 1); +insert into link_table4 values ('b1', "rk6", 2, 2, 1, 1, 'data22', 1, 1); +insert into link_table4 values ('b1', "rk7", 3, 2, 1, 1, 'data32', 1, 1); diff --git a/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc new file mode 100644 index 00000000..7adca5d7 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority.inc @@ -0,0 +1,174 @@ +############################################################################### +# Common test file for high priority DDL +############################################################################### + + +create user test_user1@localhost; +grant all on test to test_user1@localhost; +create user test_user2@localhost; +grant all on test to test_user2@localhost; + +# Default values +--let $con_block = con1 +--let $con_kill = default +--let $should_kill = 1 +--let $recreate_table = 1 +--let $throw_error = 1 + +## +## killing conflicting shared locks by alter table +## + +--let $blocking_sql = lock tables t1 read; +--let $cmd = alter table t1 modify i bigint; +--let $high_priority_cmd = alter high_priority table t1 modify i bigint; + +--source include/ddl_high_priority_module.inc + +## +## killing conflicting shared lock in a transaction +## transaction will rollback +## + +--let $blocking_sql = begin; insert into t1 values (4); select i from t1; +--let $cmd = alter table t1 rename t1_new; +--let $high_priority_cmd = alter high_priority table t1 rename t1_new; + +--source include/ddl_high_priority_module.inc + +select * from t1_new; +drop table t1_new; + +## +## simulate conflicting DDL which will not be killed +## + +# Simulate conflicting DDL +# This will hold MDL_SHARED_NO_READ_WRITE, which may be upgraded to exclusive +# locks to run DDLs like ALTER TABLE +# the upgradable/exclusive lock should not be killed + +--let $should_kill = 0 + +--let $blocking_sql = lock tables t1 write; +--let $cmd = drop table t1; +--let $high_priority_cmd = drop high_priority table t1; + +--source include/ddl_high_priority_module.inc + +# restore $should_kill +--let $should_kill = 1 + +## +## killing conflicting transaction by drop table DDL +## + +--let $blocking_sql = lock tables t1 read; begin; insert into t1 values (4); +--let $cmd = drop table t1; +--let $high_priority_cmd = drop high_priority table t1; + +--source include/ddl_high_priority_module.inc + +## +## no effect for regular users +## + +connect (con2,localhost,test_user2,,test,,); +# $con_kill is regular user +--let $con_kill = con2 +--let $should_kill = 0 + +--let $blocking_sql = lock tables t1 read; +--let $cmd = alter table t1 modify i bigint; +--let $high_priority_cmd = alter high_priority table t1 modify i bigint; + +--source include/ddl_high_priority_module.inc + +disconnect con2; + +# restore $con_kill +--let $con_kill = default +# restore $should_kill +--let $should_kill = 1 + +## +## create/drop index +## + +# create index + +--let $blocking_sql = lock tables t1 read; +--let $cmd = create index idx1 on t1 (i); +--let $high_priority_cmd = create high_priority index idx1 on t1 (i); + +--source include/ddl_high_priority_module.inc + +# drop index (use the previously created table) +--let $recreate_table = 0 + +--let $cmd = drop index idx1 on t1; +--let $high_priority_cmd = drop high_priority index idx1 on t1; + +--source include/ddl_high_priority_module.inc + +# restore $recreate_table +--let $recreate_table = 1 + +## +## high_priority truncate table +## + +--let $blocking_sql = lock tables t1 read; +--let $cmd = truncate t1; +--let $high_priority_cmd = truncate high_priority t1; + +--source include/ddl_high_priority_module.inc + +## +## high_priority create/drop trigger +## + +--let $blocking_sql = lock tables t1 read; +--let $cmd = create trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i; +--let $high_priority_cmd = create high_priority trigger ins_sum before insert on t1 for each row set @sum = @sum + new.i; + +--source include/ddl_high_priority_module.inc + +# drop trigger (use the previously created table) +--let $recreate_table = 0 + +--let $cmd = drop trigger ins_sum; +--let $high_priority_cmd = drop high_priority trigger ins_sum; + +--source include/ddl_high_priority_module.inc + +# restore $recreate_table +--let $recreate_table = 1 + +## +## high_priority optimize table +## +## "optimize table" doesn't throw errors. It catches all errors, and +## returns a result set in a table +## + +--let $throw_error = 0 + +--let $blocking_sql = lock tables t1 read; +--let $cmd = optimize table t1; +--let $high_priority_cmd = optimize high_priority table t1; + +--source include/ddl_high_priority_module.inc + +# restore throw_error +--let $throw_error = 1 + +## +## clean up +## + +drop user test_user1@localhost; +drop user test_user2@localhost; +--disable_warnings +drop table if exists t1; +--enable_warnings diff --git a/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc new file mode 100644 index 00000000..ffbdc306 --- /dev/null +++ b/storage/rocksdb/mysql-test/rocksdb/include/ddl_high_priority_module.inc @@ -0,0 +1,141 @@ +############################################################################### +# This file plays as a function/module for ddl_high_priority test +# +# Usage: set the following variables before including +# +# $use_sys_var: whether using sys_var or syntax to trigger high_priority +# value: 0/1 +# +# $con_block: a blocking connection +# value: con1/con2/default +# +# $con_kill: a connection that will attempt to kill $con_blocking +# value: con1/con2/default +# +# $cmd: a regular command to evaluate (to use with sys var) +# value: sql command +# +# $high_priority_cmd: a high_priority command to evaluate +# value: sql command +# +# $should_kill: Expect the con_block to be killed or not +# value: 0/1 +# +# $recreate_table: Should recreate the test table or not +# value: 0/1 +# +# $throw_error: whether a command will throw lock_wait_timeout error. +# Note, optimize table catches all errors. +# value: 0/1 +############################################################################### + +## +## Print out the parameters of the test set +## (useful for debugging) +## +--echo +--echo ## Test parameters: +--echo ## use_sys_var = $use_sys_var +--echo ## con_block = $con_block +--echo ## con_kill = $con_kill +--echo ## cmd = $cmd +--echo ## high_priority_cmd = $high_priority_cmd +--echo ## should_kill = $should_kill +--echo ## recreate_table = $recreate_table +--echo ## throw_error = $throw_error +--echo + + +## +## Setup +## + +connection default; + +# create con1 +connect (con1,localhost,test_user1,,test,,); + +if ($recreate_table) { + # create t1 + --disable_warnings + drop table if exists t1; + --enable_warnings + create table t1 (i int); + show create table t1; + insert into t1 values (1), (2), (3); +} + +## +## Testing +## + +--echo connection: $con_block +--connection $con_block +--eval $blocking_sql + +--echo connection: $con_kill +--connection $con_kill +set lock_wait_timeout = 0.02; +set high_priority_lock_wait_timeout = 0.02; + +describe t1; + +--echo connection: default (for show processlist) +connection default; +--echo # both $con_block and $con_kill exist +--replace_column 1 3 5 6